mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 14:16:19 +00:00
Compare commits
8 commits
eb2b68fab0
...
290d29ea78
Author | SHA1 | Date | |
---|---|---|---|
290d29ea78 | |||
d58df3fd04 | |||
dbbd8e7f3a | |||
7ccc2432e3 | |||
28579d6883 | |||
378d4a4313 | |||
7ae225b1d6 | |||
4f1ea65de0 |
14 changed files with 149 additions and 58 deletions
|
@ -1,6 +1,7 @@
|
|||
data/warcs/
|
||||
data/carrefour/
|
||||
*/*.db*
|
||||
sqlite.db
|
||||
downloader/
|
||||
node_modules/
|
||||
*/node_modules/
|
||||
|
|
|
@ -4,6 +4,10 @@ WORKDIR /usr/src/app
|
|||
FROM base as build
|
||||
RUN apk add --no-cache nodejs npm
|
||||
RUN npm install --global pnpm
|
||||
COPY db-datos/package.json db-datos/package.json
|
||||
COPY sitio/package.json sitio/package.json
|
||||
COPY pnpm-lock.yaml pnpm-workspace.yaml .
|
||||
RUN cd sitio && pnpm install
|
||||
COPY . .
|
||||
COPY db-datos/drizzle .
|
||||
RUN cd sitio && \
|
||||
|
|
|
@ -3,6 +3,13 @@ import Database from "better-sqlite3";
|
|||
import { drizzle } from "drizzle-orm/better-sqlite3";
|
||||
import { DB_PATH } from "./drizzle.config.js";
|
||||
import * as schema from "./schema.js";
|
||||
import { migrateDb } from "./migrate.js";
|
||||
|
||||
export const sqlite = new Database(DB_PATH);
|
||||
export const db = drizzle(sqlite, { schema });
|
||||
/** @type {null | import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} */
|
||||
let db = null;
|
||||
export function getDb() {
|
||||
const sqlite = new Database(DB_PATH);
|
||||
db = drizzle(sqlite, { schema });
|
||||
migrateDb(db);
|
||||
return db;
|
||||
}
|
||||
|
|
|
@ -1,21 +1,13 @@
|
|||
// @ts-check
|
||||
import Database from "better-sqlite3";
|
||||
import { join, dirname } from "node:path";
|
||||
import { drizzle } from "drizzle-orm/better-sqlite3";
|
||||
import { migrate } from "drizzle-orm/better-sqlite3/migrator";
|
||||
import * as schema from "./schema.js";
|
||||
import { DB_PATH } from "./drizzle.config.js";
|
||||
import { sql } from "drizzle-orm";
|
||||
|
||||
const url = new URL(import.meta.url);
|
||||
export function migrateDb() {
|
||||
const sqlite = new Database(DB_PATH);
|
||||
const db = drizzle(sqlite, { schema });
|
||||
|
||||
migrate(db, { migrationsFolder: join(dirname(url.pathname), "drizzle") });
|
||||
sqlite.exec(`
|
||||
pragma journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
`);
|
||||
|
||||
sqlite.close();
|
||||
/**
|
||||
* @param {import("drizzle-orm/better-sqlite3").BetterSQLite3Database<schema>} db
|
||||
*/
|
||||
export function migrateDb(db) {
|
||||
migrate(db, { migrationsFolder: "node_modules/db-datos/drizzle" });
|
||||
db.run(sql`pragma journal_mode = WAL;`);
|
||||
db.run(sql`PRAGMA synchronous = NORMAL;`);
|
||||
}
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
import { sql } from "drizzle-orm";
|
||||
import { db } from "./db.js";
|
||||
import { productoUrls } from "./schema.js";
|
||||
|
||||
export function saveUrls(urls: string[]) {
|
||||
db.transaction((tx) => {
|
||||
const now = new Date();
|
||||
const insertUrlTra = tx
|
||||
.insert(productoUrls)
|
||||
.values({
|
||||
url: sql.placeholder("url"),
|
||||
firstSeen: now,
|
||||
lastSeen: now,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: productoUrls.url,
|
||||
set: { lastSeen: now },
|
||||
})
|
||||
.prepare();
|
||||
|
||||
for (const href of urls) {
|
||||
insertUrlTra.run({ url: href });
|
||||
}
|
||||
});
|
||||
}
|
99
scraper-rs/Cargo.lock
generated
99
scraper-rs/Cargo.lock
generated
|
@ -61,6 +61,21 @@ version = "0.2.16"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||
|
||||
[[package]]
|
||||
name = "android-tzdata"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.5"
|
||||
|
@ -227,6 +242,20 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41daef31d7a747c5c847246f36de49ced6f7403b4cdabc807a97b5cc184cda7a"
|
||||
dependencies = [
|
||||
"android-tzdata",
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-targets 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.4.15"
|
||||
|
@ -298,6 +327,17 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cron"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ff76b51e4c068c52bfd2866e1567bee7c567ae8f24ada09fd4307019e25eab7"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"nom",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deadpool"
|
||||
version = "0.10.0"
|
||||
|
@ -636,6 +676,29 @@ dependencies = [
|
|||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"iana-time-zone-haiku",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone-haiku"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.5.0"
|
||||
|
@ -745,6 +808,12 @@ version = "0.3.17"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
|
@ -774,6 +843,16 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
|
@ -784,6 +863,15 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.16.0"
|
||||
|
@ -1135,7 +1223,9 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"again",
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"clap",
|
||||
"cron",
|
||||
"deadpool",
|
||||
"deadpool-sqlite",
|
||||
"futures",
|
||||
|
@ -1688,6 +1778,15 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
|
|
|
@ -8,7 +8,9 @@ edition = "2021"
|
|||
[dependencies]
|
||||
again = "0.1.2"
|
||||
anyhow = "1.0.79"
|
||||
chrono = "0.4.32"
|
||||
clap = { version = "4.4.15", features = ["derive"] }
|
||||
cron = "0.12.0"
|
||||
deadpool = "0.10.0"
|
||||
deadpool-sqlite = "0.7.0"
|
||||
futures = "0.3.30"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use again::RetryPolicy;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use cron::Schedule;
|
||||
use deadpool_sqlite::Pool;
|
||||
use futures::{future, stream, StreamExt};
|
||||
use nanoid::nanoid;
|
||||
|
@ -9,10 +10,10 @@ use std::{
|
|||
env::{self},
|
||||
fs,
|
||||
path::PathBuf,
|
||||
time::Duration,
|
||||
str::FromStr,
|
||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use thiserror::Error;
|
||||
use tokio::time;
|
||||
|
||||
#[derive(ValueEnum, Clone, Debug)]
|
||||
enum Supermercado {
|
||||
|
@ -399,16 +400,24 @@ async fn auto_cli() -> anyhow::Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
async fn cron_cli() -> anyhow::Result<()> {
|
||||
let mut interval = time::interval(std::time::Duration::from_secs(60 * 60 * 24));
|
||||
// https://crontab.guru
|
||||
let schedule = Schedule::from_str("0 0 2 * * * *").unwrap();
|
||||
// let schedule = Schedule::from_str("0 26 21 * * * *").unwrap();
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
tokio::spawn(auto_cli());
|
||||
let t = schedule
|
||||
.upcoming(chrono::Utc)
|
||||
.next()
|
||||
.unwrap()
|
||||
.signed_duration_since(chrono::Utc::now())
|
||||
.to_std()
|
||||
.unwrap();
|
||||
println!("Waiting for {:?}", t);
|
||||
tokio::time::sleep(t).await;
|
||||
auto_cli().await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
mod sites;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -1,4 +1,2 @@
|
|||
export { db } from "db-datos/db.js";
|
||||
export { getDb } from "db-datos/db.js";
|
||||
export * as schema from "db-datos/schema.js";
|
||||
import { migrateDb } from "db-datos/migrate.js";
|
||||
migrateDb();
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import { countDistinct } from "drizzle-orm";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
import { getDb, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
|
||||
export const load: PageServerLoad = async () => {
|
||||
const db = await getDb();
|
||||
const nProductosR = await db
|
||||
.select({
|
||||
count: countDistinct(precios.ean),
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import type { PageData, PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
import { getDb, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
import { sql } from "drizzle-orm";
|
||||
|
||||
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
|
||||
|
||||
async function doQuery() {
|
||||
const db = await getDb();
|
||||
const q = db
|
||||
.select({
|
||||
ean: precios.ean,
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import { error } from "@sveltejs/kit";
|
||||
import { eq, max } from "drizzle-orm";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
import { getDb, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
|
||||
export const load: PageServerLoad = async ({ params }) => {
|
||||
const db = await getDb();
|
||||
const q = db
|
||||
.select()
|
||||
.from(precios)
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import { error } from "@sveltejs/kit";
|
||||
import { sql } from "drizzle-orm";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db } from "$lib/server/db";
|
||||
import { getDb } from "$lib/server/db";
|
||||
|
||||
export const load: PageServerLoad = async ({ url }) => {
|
||||
const db = await getDb();
|
||||
const query = url.searchParams.get("q");
|
||||
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
|
||||
if (query) {
|
||||
|
|
|
@ -17,5 +17,5 @@
|
|||
"forceConsistentCasingInFileNames": true
|
||||
},
|
||||
"include": ["**/*.ts", "**/*.js"],
|
||||
"exclude": ["sitio/build"]
|
||||
"exclude": ["./scraper-rs", "data"]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue