mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 11:36:20 +00:00
Compare commits
No commits in common. "290d29ea78c1ed07524e4e8bf4a66dc22634ac1e" and "eb2b68fab0a9bda212c455220f885183a705e7ad" have entirely different histories.
290d29ea78
...
eb2b68fab0
14 changed files with 58 additions and 149 deletions
|
@ -1,7 +1,6 @@
|
||||||
data/warcs/
|
data/warcs/
|
||||||
data/carrefour/
|
data/carrefour/
|
||||||
*/*.db*
|
*/*.db*
|
||||||
sqlite.db
|
|
||||||
downloader/
|
downloader/
|
||||||
node_modules/
|
node_modules/
|
||||||
*/node_modules/
|
*/node_modules/
|
||||||
|
|
|
@ -4,10 +4,6 @@ WORKDIR /usr/src/app
|
||||||
FROM base as build
|
FROM base as build
|
||||||
RUN apk add --no-cache nodejs npm
|
RUN apk add --no-cache nodejs npm
|
||||||
RUN npm install --global pnpm
|
RUN npm install --global pnpm
|
||||||
COPY db-datos/package.json db-datos/package.json
|
|
||||||
COPY sitio/package.json sitio/package.json
|
|
||||||
COPY pnpm-lock.yaml pnpm-workspace.yaml .
|
|
||||||
RUN cd sitio && pnpm install
|
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY db-datos/drizzle .
|
COPY db-datos/drizzle .
|
||||||
RUN cd sitio && \
|
RUN cd sitio && \
|
||||||
|
|
|
@ -3,13 +3,6 @@ import Database from "better-sqlite3";
|
||||||
import { drizzle } from "drizzle-orm/better-sqlite3";
|
import { drizzle } from "drizzle-orm/better-sqlite3";
|
||||||
import { DB_PATH } from "./drizzle.config.js";
|
import { DB_PATH } from "./drizzle.config.js";
|
||||||
import * as schema from "./schema.js";
|
import * as schema from "./schema.js";
|
||||||
import { migrateDb } from "./migrate.js";
|
|
||||||
|
|
||||||
/** @type {null | import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} */
|
export const sqlite = new Database(DB_PATH);
|
||||||
let db = null;
|
export const db = drizzle(sqlite, { schema });
|
||||||
export function getDb() {
|
|
||||||
const sqlite = new Database(DB_PATH);
|
|
||||||
db = drizzle(sqlite, { schema });
|
|
||||||
migrateDb(db);
|
|
||||||
return db;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,13 +1,21 @@
|
||||||
// @ts-check
|
// @ts-check
|
||||||
|
import Database from "better-sqlite3";
|
||||||
|
import { join, dirname } from "node:path";
|
||||||
|
import { drizzle } from "drizzle-orm/better-sqlite3";
|
||||||
import { migrate } from "drizzle-orm/better-sqlite3/migrator";
|
import { migrate } from "drizzle-orm/better-sqlite3/migrator";
|
||||||
import * as schema from "./schema.js";
|
import * as schema from "./schema.js";
|
||||||
import { sql } from "drizzle-orm";
|
import { DB_PATH } from "./drizzle.config.js";
|
||||||
|
|
||||||
/**
|
const url = new URL(import.meta.url);
|
||||||
* @param {import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} db
|
export function migrateDb() {
|
||||||
*/
|
const sqlite = new Database(DB_PATH);
|
||||||
export function migrateDb(db) {
|
const db = drizzle(sqlite, { schema });
|
||||||
migrate(db, { migrationsFolder: "node_modules/db-datos/drizzle" });
|
|
||||||
db.run(sql`pragma journal_mode = WAL;`);
|
migrate(db, { migrationsFolder: join(dirname(url.pathname), "drizzle") });
|
||||||
db.run(sql`PRAGMA synchronous = NORMAL;`);
|
sqlite.exec(`
|
||||||
|
pragma journal_mode = WAL;
|
||||||
|
PRAGMA synchronous = NORMAL;
|
||||||
|
`);
|
||||||
|
|
||||||
|
sqlite.close();
|
||||||
}
|
}
|
||||||
|
|
25
db-datos/urlHelpers.ts
Normal file
25
db-datos/urlHelpers.ts
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
import { sql } from "drizzle-orm";
|
||||||
|
import { db } from "./db.js";
|
||||||
|
import { productoUrls } from "./schema.js";
|
||||||
|
|
||||||
|
export function saveUrls(urls: string[]) {
|
||||||
|
db.transaction((tx) => {
|
||||||
|
const now = new Date();
|
||||||
|
const insertUrlTra = tx
|
||||||
|
.insert(productoUrls)
|
||||||
|
.values({
|
||||||
|
url: sql.placeholder("url"),
|
||||||
|
firstSeen: now,
|
||||||
|
lastSeen: now,
|
||||||
|
})
|
||||||
|
.onConflictDoUpdate({
|
||||||
|
target: productoUrls.url,
|
||||||
|
set: { lastSeen: now },
|
||||||
|
})
|
||||||
|
.prepare();
|
||||||
|
|
||||||
|
for (const href of urls) {
|
||||||
|
insertUrlTra.run({ url: href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
99
scraper-rs/Cargo.lock
generated
99
scraper-rs/Cargo.lock
generated
|
@ -61,21 +61,6 @@ version = "0.2.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "android-tzdata"
|
|
||||||
version = "0.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "android_system_properties"
|
|
||||||
version = "0.1.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.6.5"
|
version = "0.6.5"
|
||||||
|
@ -242,20 +227,6 @@ version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "chrono"
|
|
||||||
version = "0.4.32"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "41daef31d7a747c5c847246f36de49ced6f7403b4cdabc807a97b5cc184cda7a"
|
|
||||||
dependencies = [
|
|
||||||
"android-tzdata",
|
|
||||||
"iana-time-zone",
|
|
||||||
"js-sys",
|
|
||||||
"num-traits",
|
|
||||||
"wasm-bindgen",
|
|
||||||
"windows-targets 0.52.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.4.15"
|
version = "4.4.15"
|
||||||
|
@ -327,17 +298,6 @@ dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cron"
|
|
||||||
version = "0.12.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1ff76b51e4c068c52bfd2866e1567bee7c567ae8f24ada09fd4307019e25eab7"
|
|
||||||
dependencies = [
|
|
||||||
"chrono",
|
|
||||||
"nom",
|
|
||||||
"once_cell",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deadpool"
|
name = "deadpool"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
@ -676,29 +636,6 @@ dependencies = [
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "iana-time-zone"
|
|
||||||
version = "0.1.59"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539"
|
|
||||||
dependencies = [
|
|
||||||
"android_system_properties",
|
|
||||||
"core-foundation-sys",
|
|
||||||
"iana-time-zone-haiku",
|
|
||||||
"js-sys",
|
|
||||||
"wasm-bindgen",
|
|
||||||
"windows-core",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "iana-time-zone-haiku"
|
|
||||||
version = "0.1.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
|
@ -808,12 +745,6 @@ version = "0.3.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "minimal-lexical"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "miniz_oxide"
|
name = "miniz_oxide"
|
||||||
version = "0.7.1"
|
version = "0.7.1"
|
||||||
|
@ -843,16 +774,6 @@ dependencies = [
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nom"
|
|
||||||
version = "7.1.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
"minimal-lexical",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nu-ansi-term"
|
name = "nu-ansi-term"
|
||||||
version = "0.46.0"
|
version = "0.46.0"
|
||||||
|
@ -863,15 +784,6 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "num-traits"
|
|
||||||
version = "0.2.17"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num_cpus"
|
name = "num_cpus"
|
||||||
version = "1.16.0"
|
version = "1.16.0"
|
||||||
|
@ -1223,9 +1135,7 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"again",
|
"again",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
|
||||||
"clap",
|
"clap",
|
||||||
"cron",
|
|
||||||
"deadpool",
|
"deadpool",
|
||||||
"deadpool-sqlite",
|
"deadpool-sqlite",
|
||||||
"futures",
|
"futures",
|
||||||
|
@ -1778,15 +1688,6 @@ version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-core"
|
|
||||||
version = "0.52.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
|
||||||
dependencies = [
|
|
||||||
"windows-targets 0.52.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
|
|
|
@ -8,9 +8,7 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
again = "0.1.2"
|
again = "0.1.2"
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.79"
|
||||||
chrono = "0.4.32"
|
|
||||||
clap = { version = "4.4.15", features = ["derive"] }
|
clap = { version = "4.4.15", features = ["derive"] }
|
||||||
cron = "0.12.0"
|
|
||||||
deadpool = "0.10.0"
|
deadpool = "0.10.0"
|
||||||
deadpool-sqlite = "0.7.0"
|
deadpool-sqlite = "0.7.0"
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
use again::RetryPolicy;
|
use again::RetryPolicy;
|
||||||
use clap::{Parser, ValueEnum};
|
use clap::{Parser, ValueEnum};
|
||||||
use cron::Schedule;
|
|
||||||
use deadpool_sqlite::Pool;
|
use deadpool_sqlite::Pool;
|
||||||
use futures::{future, stream, StreamExt};
|
use futures::{future, stream, StreamExt};
|
||||||
use nanoid::nanoid;
|
use nanoid::nanoid;
|
||||||
|
@ -10,10 +9,10 @@ use std::{
|
||||||
env::{self},
|
env::{self},
|
||||||
fs,
|
fs,
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
str::FromStr,
|
time::Duration,
|
||||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
|
||||||
};
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
use tokio::time;
|
||||||
|
|
||||||
#[derive(ValueEnum, Clone, Debug)]
|
#[derive(ValueEnum, Clone, Debug)]
|
||||||
enum Supermercado {
|
enum Supermercado {
|
||||||
|
@ -400,24 +399,16 @@ async fn auto_cli() -> anyhow::Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
async fn cron_cli() -> anyhow::Result<()> {
|
async fn cron_cli() -> anyhow::Result<()> {
|
||||||
// https://crontab.guru
|
let mut interval = time::interval(std::time::Duration::from_secs(60 * 60 * 24));
|
||||||
let schedule = Schedule::from_str("0 0 2 * * * *").unwrap();
|
|
||||||
// let schedule = Schedule::from_str("0 26 21 * * * *").unwrap();
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let t = schedule
|
interval.tick().await;
|
||||||
.upcoming(chrono::Utc)
|
tokio::spawn(auto_cli());
|
||||||
.next()
|
|
||||||
.unwrap()
|
|
||||||
.signed_duration_since(chrono::Utc::now())
|
|
||||||
.to_std()
|
|
||||||
.unwrap();
|
|
||||||
println!("Waiting for {:?}", t);
|
|
||||||
tokio::time::sleep(t).await;
|
|
||||||
auto_cli().await.unwrap();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
mod sites;
|
mod sites;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -1,2 +1,4 @@
|
||||||
export { getDb } from "db-datos/db.js";
|
export { db } from "db-datos/db.js";
|
||||||
export * as schema from "db-datos/schema.js";
|
export * as schema from "db-datos/schema.js";
|
||||||
|
import { migrateDb } from "db-datos/migrate.js";
|
||||||
|
migrateDb();
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
import { countDistinct } from "drizzle-orm";
|
import { countDistinct } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { getDb, schema } from "$lib/server/db";
|
import { db, schema } from "$lib/server/db";
|
||||||
const { precios } = schema;
|
const { precios } = schema;
|
||||||
|
|
||||||
export const load: PageServerLoad = async () => {
|
export const load: PageServerLoad = async () => {
|
||||||
const db = await getDb();
|
|
||||||
const nProductosR = await db
|
const nProductosR = await db
|
||||||
.select({
|
.select({
|
||||||
count: countDistinct(precios.ean),
|
count: countDistinct(precios.ean),
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import type { PageData, PageServerLoad } from "./$types";
|
import type { PageData, PageServerLoad } from "./$types";
|
||||||
import { getDb, schema } from "$lib/server/db";
|
import { db, schema } from "$lib/server/db";
|
||||||
const { precios } = schema;
|
const { precios } = schema;
|
||||||
import { sql } from "drizzle-orm";
|
import { sql } from "drizzle-orm";
|
||||||
|
|
||||||
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
|
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
|
||||||
|
|
||||||
async function doQuery() {
|
async function doQuery() {
|
||||||
const db = await getDb();
|
|
||||||
const q = db
|
const q = db
|
||||||
.select({
|
.select({
|
||||||
ean: precios.ean,
|
ean: precios.ean,
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
import { error } from "@sveltejs/kit";
|
import { error } from "@sveltejs/kit";
|
||||||
import { eq, max } from "drizzle-orm";
|
import { eq, max } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { getDb, schema } from "$lib/server/db";
|
import { db, schema } from "$lib/server/db";
|
||||||
const { precios } = schema;
|
const { precios } = schema;
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({ params }) => {
|
export const load: PageServerLoad = async ({ params }) => {
|
||||||
const db = await getDb();
|
|
||||||
const q = db
|
const q = db
|
||||||
.select()
|
.select()
|
||||||
.from(precios)
|
.from(precios)
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
import { error } from "@sveltejs/kit";
|
import { error } from "@sveltejs/kit";
|
||||||
import { sql } from "drizzle-orm";
|
import { sql } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { getDb } from "$lib/server/db";
|
import { db } from "$lib/server/db";
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({ url }) => {
|
export const load: PageServerLoad = async ({ url }) => {
|
||||||
const db = await getDb();
|
|
||||||
const query = url.searchParams.get("q");
|
const query = url.searchParams.get("q");
|
||||||
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
|
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
|
||||||
if (query) {
|
if (query) {
|
||||||
|
|
|
@ -17,5 +17,5 @@
|
||||||
"forceConsistentCasingInFileNames": true
|
"forceConsistentCasingInFileNames": true
|
||||||
},
|
},
|
||||||
"include": ["**/*.ts", "**/*.js"],
|
"include": ["**/*.ts", "**/*.js"],
|
||||||
"exclude": ["./scraper-rs", "data"]
|
"exclude": ["sitio/build"]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue