Compare commits

..

No commits in common. "290d29ea78c1ed07524e4e8bf4a66dc22634ac1e" and "eb2b68fab0a9bda212c455220f885183a705e7ad" have entirely different histories.

14 changed files with 58 additions and 149 deletions

View file

@ -1,7 +1,6 @@
data/warcs/
data/carrefour/
*/*.db*
sqlite.db
downloader/
node_modules/
*/node_modules/

View file

@ -4,10 +4,6 @@ WORKDIR /usr/src/app
FROM base as build
RUN apk add --no-cache nodejs npm
RUN npm install --global pnpm
COPY db-datos/package.json db-datos/package.json
COPY sitio/package.json sitio/package.json
COPY pnpm-lock.yaml pnpm-workspace.yaml .
RUN cd sitio && pnpm install
COPY . .
COPY db-datos/drizzle .
RUN cd sitio && \

View file

@ -3,13 +3,6 @@ import Database from "better-sqlite3";
import { drizzle } from "drizzle-orm/better-sqlite3";
import { DB_PATH } from "./drizzle.config.js";
import * as schema from "./schema.js";
import { migrateDb } from "./migrate.js";
/** @type {null | import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} */
let db = null;
export function getDb() {
const sqlite = new Database(DB_PATH);
db = drizzle(sqlite, { schema });
migrateDb(db);
return db;
}
export const sqlite = new Database(DB_PATH);
export const db = drizzle(sqlite, { schema });

View file

@ -1,13 +1,21 @@
// @ts-check
import Database from "better-sqlite3";
import { join, dirname } from "node:path";
import { drizzle } from "drizzle-orm/better-sqlite3";
import { migrate } from "drizzle-orm/better-sqlite3/migrator";
import * as schema from "./schema.js";
import { sql } from "drizzle-orm";
import { DB_PATH } from "./drizzle.config.js";
/**
* @param {import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} db
*/
export function migrateDb(db) {
migrate(db, { migrationsFolder: "node_modules/db-datos/drizzle" });
db.run(sql`pragma journal_mode = WAL;`);
db.run(sql`PRAGMA synchronous = NORMAL;`);
const url = new URL(import.meta.url);
export function migrateDb() {
const sqlite = new Database(DB_PATH);
const db = drizzle(sqlite, { schema });
migrate(db, { migrationsFolder: join(dirname(url.pathname), "drizzle") });
sqlite.exec(`
pragma journal_mode = WAL;
PRAGMA synchronous = NORMAL;
`);
sqlite.close();
}

25
db-datos/urlHelpers.ts Normal file
View file

@ -0,0 +1,25 @@
import { sql } from "drizzle-orm";
import { db } from "./db.js";
import { productoUrls } from "./schema.js";
export function saveUrls(urls: string[]) {
db.transaction((tx) => {
const now = new Date();
const insertUrlTra = tx
.insert(productoUrls)
.values({
url: sql.placeholder("url"),
firstSeen: now,
lastSeen: now,
})
.onConflictDoUpdate({
target: productoUrls.url,
set: { lastSeen: now },
})
.prepare();
for (const href of urls) {
insertUrlTra.run({ url: href });
}
});
}

99
scraper-rs/Cargo.lock generated
View file

@ -61,21 +61,6 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.5"
@ -242,20 +227,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41daef31d7a747c5c847246f36de49ced6f7403b4cdabc807a97b5cc184cda7a"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.52.0",
]
[[package]]
name = "clap"
version = "4.4.15"
@ -327,17 +298,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "cron"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ff76b51e4c068c52bfd2866e1567bee7c567ae8f24ada09fd4307019e25eab7"
dependencies = [
"chrono",
"nom",
"once_cell",
]
[[package]]
name = "deadpool"
version = "0.10.0"
@ -676,29 +636,6 @@ dependencies = [
"tokio-rustls",
]
[[package]]
name = "iana-time-zone"
version = "0.1.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "idna"
version = "0.5.0"
@ -808,12 +745,6 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@ -843,16 +774,6 @@ dependencies = [
"rand 0.8.5",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@ -863,15 +784,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
@ -1223,9 +1135,7 @@ version = "0.1.0"
dependencies = [
"again",
"anyhow",
"chrono",
"clap",
"cron",
"deadpool",
"deadpool-sqlite",
"futures",
@ -1778,15 +1688,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-sys"
version = "0.48.0"

View file

@ -8,9 +8,7 @@ edition = "2021"
[dependencies]
again = "0.1.2"
anyhow = "1.0.79"
chrono = "0.4.32"
clap = { version = "4.4.15", features = ["derive"] }
cron = "0.12.0"
deadpool = "0.10.0"
deadpool-sqlite = "0.7.0"
futures = "0.3.30"

View file

@ -1,6 +1,5 @@
use again::RetryPolicy;
use clap::{Parser, ValueEnum};
use cron::Schedule;
use deadpool_sqlite::Pool;
use futures::{future, stream, StreamExt};
use nanoid::nanoid;
@ -10,10 +9,10 @@ use std::{
env::{self},
fs,
path::PathBuf,
str::FromStr,
time::{Duration, SystemTime, UNIX_EPOCH},
time::Duration,
};
use thiserror::Error;
use tokio::time;
#[derive(ValueEnum, Clone, Debug)]
enum Supermercado {
@ -400,24 +399,16 @@ async fn auto_cli() -> anyhow::Result<()> {
Ok(())
}
async fn cron_cli() -> anyhow::Result<()> {
// https://crontab.guru
let schedule = Schedule::from_str("0 0 2 * * * *").unwrap();
// let schedule = Schedule::from_str("0 26 21 * * * *").unwrap();
let mut interval = time::interval(std::time::Duration::from_secs(60 * 60 * 24));
loop {
let t = schedule
.upcoming(chrono::Utc)
.next()
.unwrap()
.signed_duration_since(chrono::Utc::now())
.to_std()
.unwrap();
println!("Waiting for {:?}", t);
tokio::time::sleep(t).await;
auto_cli().await.unwrap();
interval.tick().await;
tokio::spawn(auto_cli());
}
}
use std::time::{SystemTime, UNIX_EPOCH};
mod sites;
#[derive(Debug)]

View file

@ -1,2 +1,4 @@
export { getDb } from "db-datos/db.js";
export { db } from "db-datos/db.js";
export * as schema from "db-datos/schema.js";
import { migrateDb } from "db-datos/migrate.js";
migrateDb();

View file

@ -1,10 +1,9 @@
import { countDistinct } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
import { db, schema } from "$lib/server/db";
const { precios } = schema;
export const load: PageServerLoad = async () => {
const db = await getDb();
const nProductosR = await db
.select({
count: countDistinct(precios.ean),

View file

@ -1,12 +1,11 @@
import type { PageData, PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
import { db, schema } from "$lib/server/db";
const { precios } = schema;
import { sql } from "drizzle-orm";
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
async function doQuery() {
const db = await getDb();
const q = db
.select({
ean: precios.ean,

View file

@ -1,11 +1,10 @@
import { error } from "@sveltejs/kit";
import { eq, max } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
import { db, schema } from "$lib/server/db";
const { precios } = schema;
export const load: PageServerLoad = async ({ params }) => {
const db = await getDb();
const q = db
.select()
.from(precios)

View file

@ -1,10 +1,9 @@
import { error } from "@sveltejs/kit";
import { sql } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { getDb } from "$lib/server/db";
import { db } from "$lib/server/db";
export const load: PageServerLoad = async ({ url }) => {
const db = await getDb();
const query = url.searchParams.get("q");
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
if (query) {

View file

@ -17,5 +17,5 @@
"forceConsistentCasingInFileNames": true
},
"include": ["**/*.ts", "**/*.js"],
"exclude": ["./scraper-rs", "data"]
"exclude": ["sitio/build"]
}