Compare commits

..

No commits in common. "3b63fd077560fafcb40c36eb575c70af697c0c86" and "c53897891b06c243a91ebbf299ab72fc979556bb" have entirely different histories.

8 changed files with 124 additions and 124 deletions

2
.gitignore vendored
View file

@ -5,7 +5,7 @@ data/carrefour
p.* p.*
p p
node_modules/ node_modules/
*.db* *.db
*.db-shm *.db-shm
*.db-wal *.db-wal
scraper/debug/ scraper/debug/

View file

@ -42,9 +42,6 @@ importers:
drizzle-orm: drizzle-orm:
specifier: ^0.29.1 specifier: ^0.29.1
version: 0.29.3(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2) version: 0.29.3(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
zod:
specifier: ^3.22.4
version: 3.22.4
devDependencies: devDependencies:
'@sveltejs/adapter-node': '@sveltejs/adapter-node':
specifier: ^2.0.2 specifier: ^2.0.2
@ -2862,3 +2859,4 @@ packages:
/zod@3.22.4: /zod@3.22.4:
resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==} resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==}
dev: true

View file

@ -47,10 +47,7 @@ struct ScrapUrlArgs {
url: String, url: String,
} }
#[derive(clap::Args)] #[derive(clap::Args)]
struct AutoArgs { struct AutoArgs {}
#[arg(long)]
n_products: Option<usize>,
}
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
@ -62,7 +59,7 @@ async fn main() -> anyhow::Result<()> {
Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await, Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await,
Args::ScrapUrl(a) => scrap_url_cli(a.url).await, Args::ScrapUrl(a) => scrap_url_cli(a.url).await,
Args::ScrapBestSelling => scrap_best_selling_cli().await, Args::ScrapBestSelling => scrap_best_selling_cli().await,
Args::Auto(a) => auto_cli(a).await, Args::Auto(_) => auto_cli().await,
Args::Cron(_) => cron_cli().await, Args::Cron(_) => cron_cli().await,
} }
} }
@ -169,7 +166,7 @@ fn build_client() -> reqwest::Client {
headers.append("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36".parse().unwrap()); headers.append("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36".parse().unwrap());
reqwest::ClientBuilder::default() reqwest::ClientBuilder::default()
.timeout(Duration::from_secs(60 * 5)) .timeout(Duration::from_secs(60 * 5))
.connect_timeout(Duration::from_secs(30)) .connect_timeout(Duration::from_secs(60))
.default_headers(headers) .default_headers(headers)
.build() .build()
.unwrap() .unwrap()
@ -179,19 +176,10 @@ pub async fn do_request(client: &reqwest::Client, url: &str) -> reqwest::Result<
let response = client.execute(request).await?.error_for_status()?; let response = client.execute(request).await?.error_for_status()?;
Ok(response) Ok(response)
} }
async fn request_and_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
let res = do_request(client, url).await?;
res.text().await
}
pub async fn fetch_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
get_retry_policy()
.retry_if(|| request_and_body(client, url), retry_if_wasnt_not_found)
.await
}
pub fn get_retry_policy() -> again::RetryPolicy { pub fn get_retry_policy() -> again::RetryPolicy {
RetryPolicy::exponential(Duration::from_millis(300)) RetryPolicy::exponential(Duration::from_millis(300))
.with_max_retries(20) .with_max_retries(10)
.with_jitter(true) .with_jitter(true)
} }
@ -204,7 +192,11 @@ async fn fetch_and_parse(
client: &reqwest::Client, client: &reqwest::Client,
url: String, url: String,
) -> Result<PrecioPoint, anyhow::Error> { ) -> Result<PrecioPoint, anyhow::Error> {
let body = fetch_body(client, &url).await?; let body = get_retry_policy()
.retry_if(|| do_request(client, &url), retry_if_wasnt_not_found)
.await?
.text()
.await?;
let maybe_point = { scrap_url(client, url, &body).await }; let maybe_point = { scrap_url(client, url, &body).await };
@ -295,7 +287,6 @@ struct AutoTelegram {
struct Auto { struct Auto {
db: Db, db: Db,
telegram: Option<AutoTelegram>, telegram: Option<AutoTelegram>,
limit_n_products: Option<usize>,
} }
impl Auto { impl Auto {
async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> { async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
@ -309,13 +300,7 @@ impl Auto {
)) ))
.await; .await;
} }
let links: Vec<String> = { let links: Vec<String> = self.db.get_urls_by_domain(supermercado.host()).await?;
let mut links = self.db.get_urls_by_domain(supermercado.host()).await?;
if let Some(n) = self.limit_n_products {
links.truncate(n);
}
links
};
// { // {
// let debug_path = PathBuf::from("debug/"); // let debug_path = PathBuf::from("debug/");
// tokio::fs::create_dir_all(&debug_path).await.unwrap(); // tokio::fs::create_dir_all(&debug_path).await.unwrap();
@ -355,7 +340,7 @@ impl Auto {
} }
async fn inform(&self, msg: &str) { async fn inform(&self, msg: &str) {
tracing::info!("{}", msg); println!("{}", msg);
if let Some(telegram) = &self.telegram { if let Some(telegram) = &self.telegram {
let u = Url::parse_with_params( let u = Url::parse_with_params(
&format!("https://api.telegram.org/bot{}/sendMessage", telegram.token), &format!("https://api.telegram.org/bot{}/sendMessage", telegram.token),
@ -370,7 +355,7 @@ impl Auto {
} }
} }
async fn auto_cli(args: AutoArgs) -> anyhow::Result<()> { async fn auto_cli() -> anyhow::Result<()> {
let auto = { let auto = {
let db = Db::connect().await?; let db = Db::connect().await?;
let telegram = { let telegram = {
@ -385,11 +370,7 @@ async fn auto_cli(args: AutoArgs) -> anyhow::Result<()> {
} }
} }
}; };
Auto { Auto { db, telegram }
db,
telegram,
limit_n_products: args.n_products,
}
}; };
auto.inform("[auto] Empezando scrap").await; auto.inform("[auto] Empezando scrap").await;
let handles: Vec<_> = Supermercado::value_variants() let handles: Vec<_> = Supermercado::value_variants()
@ -397,7 +378,6 @@ async fn auto_cli(args: AutoArgs) -> anyhow::Result<()> {
.map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned()))) .map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned())))
.collect(); .collect();
future::try_join_all(handles).await?; future::try_join_all(handles).await?;
auto.inform("[auto] Download supermercados finished").await;
let best_selling = auto let best_selling = auto
.inform_time( .inform_time(
@ -424,7 +404,7 @@ async fn cron_cli() -> anyhow::Result<()> {
.unwrap(); .unwrap();
println!("Waiting for {:?}", t); println!("Waiting for {:?}", t);
tokio::time::sleep(t).await; tokio::time::sleep(t).await;
auto_cli(AutoArgs { n_products: None }).await.unwrap(); auto_cli().await.unwrap();
} }
} }

View file

@ -14,11 +14,8 @@
"format": "prettier --write ." "format": "prettier --write ."
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/adapter-node": "^2.0.2",
"@sveltejs/kit": "^2.0.0", "@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0",
"@types/better-sqlite3": "^7.6.8",
"@types/node": "^20.10.6",
"autoprefixer": "^10.4.16", "autoprefixer": "^10.4.16",
"db-datos": "workspace:^", "db-datos": "workspace:^",
"postcss": "^8.4.32", "postcss": "^8.4.32",
@ -31,7 +28,10 @@
"tailwindcss": "^3.3.6", "tailwindcss": "^3.3.6",
"tslib": "^2.4.1", "tslib": "^2.4.1",
"typescript": "^5.0.0", "typescript": "^5.0.0",
"vite": "^5.0.3" "vite": "^5.0.3",
"@sveltejs/adapter-node": "^2.0.2",
"@types/better-sqlite3": "^7.6.8",
"@types/node": "^20.10.6"
}, },
"type": "module", "type": "module",
"dependencies": { "dependencies": {
@ -39,7 +39,6 @@
"chart.js": "^4.4.1", "chart.js": "^4.4.1",
"chartjs-adapter-dayjs-4": "^1.0.4", "chartjs-adapter-dayjs-4": "^1.0.4",
"dayjs": "^1.11.10", "dayjs": "^1.11.10",
"drizzle-orm": "^0.29.1", "drizzle-orm": "^0.29.1"
"zod": "^3.22.4"
} }
} }

View file

@ -2,7 +2,3 @@
@tailwind base; @tailwind base;
@tailwind components; @tailwind components;
@tailwind utilities; @tailwind utilities;
:root {
color-scheme: light dark;
}

View file

@ -1,19 +1,10 @@
<script lang="ts" context="module">
export type Product = { ean: string; name: string; imageUrl: string | null };
</script>
<script lang="ts"> <script lang="ts">
export let product: Product; export let product: { ean: string; name: string; imageUrl?: string | null };
</script> </script>
<a href={`/ean/${product.ean}`} class="flex gap-2"> <a href={`/ean/${product.ean}`} class="flex">
{#if product.imageUrl} {#if product.imageUrl}
<img <img src={product.imageUrl} alt={product.name} class="max-h-48" />
src={product.imageUrl}
alt={product.name}
class="max-h-48"
loading="lazy"
/>
{/if} {/if}
<p class="text-xl">{product.name}</p> <p class="text-xl">{product.name}</p>
</a> </a>

View file

@ -1,60 +1,64 @@
import type { PageData, PageServerLoad } from "./$types"; import type { PageData, PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db"; import { getDb, schema } from "$lib/server/db";
const { precios, bestSelling } = schema; const { precios } = schema;
import { desc, max, sql } from "drizzle-orm"; import { desc, sql } from "drizzle-orm";
import { import {
Supermercado, Supermercado,
hostBySupermercado, hostBySupermercado,
supermercados, supermercados,
} from "db-datos/supermercado"; } from "db-datos/supermercado";
import z from "zod";
import type { Product } from "$lib/ProductPreview.svelte";
type Data = { let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
category: string;
products: Product[];
}[];
let cache: Promise<{ key: Date; data: Data }> = doQuery();
async function doQuery() { async function doQuery() {
const db = await getDb(); const db = await getDb();
console.time("ean");
const categories = await db const eans = await db
.select({ .select({
fetchedAt: bestSelling.fetchedAt, ean: precios.ean,
category: bestSelling.category,
eansJson: bestSelling.eansJson,
}) })
.from(bestSelling) .from(precios)
.groupBy(bestSelling.category) .groupBy(precios.ean)
.having(max(bestSelling.fetchedAt)); .orderBy(sql`random()`)
.limit(50);
console.timeEnd("ean");
const categoriesWithProducts = await Promise.all( return;
categories.map(async (category) => {
const eans = z.array(z.string()).parse(JSON.parse(category.eansJson));
const products = await db const precioss = await Promise.all(
.select({ supermercados.map(
ean: precios.ean, async (
name: precios.name, supermercado,
imageUrl: precios.imageUrl, ): Promise<
}) [
.from(precios) Supermercado,
.where(sql`${precios.ean} in ${eans}`) { ean: string; name: string | null; imageUrl: string | null }[],
.groupBy(precios.ean) ]
.having(max(precios.fetchedAt)); > => {
const host = hostBySupermercado[supermercado];
return { console.time(supermercado);
category: category.category, const q = db
products: eans .select({
.map((ean) => products.find((p) => p.ean === ean)) ean: precios.ean,
.filter((x): x is Product => !!x && !!x.name), name: precios.name,
}; imageUrl: precios.imageUrl,
}), })
.from(precios)
.groupBy(precios.ean)
.having(sql`max(fetched_at)`)
.where(
sql`ean in ${eans.map((x) => x.ean)} and in_stock and url like ${`%${host}%`}`,
);
// console.debug(q.toSQL());
const res = await q;
console.timeEnd(supermercado);
return [supermercado, res];
},
),
); );
const data = { precios: precioss.flatMap(([_, r]) => r) };
return { key: new Date(), data: categoriesWithProducts }; return { key: new Date(), data };
} }
setInterval( setInterval(
@ -65,8 +69,14 @@ setInterval(
4 * 60 * 60 * 1000, 4 * 60 * 60 * 1000,
); );
type Precios = {
ean: string;
name: string | null;
imageUrl: string | null;
}[];
export const load: PageServerLoad = async ({ export const load: PageServerLoad = async ({
params, params,
}): Promise<{ data: Data }> => { }): Promise<{ precios: Precios }> => {
return { data: (await cache).data }; return (await cache).data;
}; };

View file

@ -3,27 +3,53 @@
import type { PageData } from "./$types"; import type { PageData } from "./$types";
export let data: PageData; export let data: PageData;
$: precios = data.precios.filter(
const categoryLabels: { [key in string]: string } = { (d): d is { ean: string; name: string; imageUrl: string | null } =>
almacen: "Almacen", !!d.name,
bebidas: "Bebidas", );
"frutas-y-verduras": "Frutas y Verduras", $: productos = precios.reduce(
}; (prev, curr) => [
...prev,
...(prev.find((p) => p.ean === curr.ean) ? [] : [curr]),
],
[] as { ean: string; name: string; imageUrl: string | null }[],
);
</script> </script>
{#each data.data as { category, products }} <h1 class="text-xl">WIP</h1>
<section class="my-6">
<h2 class="text-2xl font-bold"> <section>
{categoryLabels[category] ?? category} <h2 class="text-lg font-bold">Ejemplos</h2>
</h2> <ul>
<ul <li>
class="grid max-w-full grid-flow-col grid-rows-2 gap-x-8 gap-y-4 overflow-x-auto" <a href="/ean/7790070410795">
> Cookies Sabor Vainilla Con Chips De Chocolate Exquisita Paq 300 Grm
{#each products as product} </a>
<li class="w-96"> </li>
<ProductPreview {product} /> <li>
</li> <a href="/ean/7794000006911">
{/each} Sopa Instantánea KNORR QUICK Zapallo Romero Sobres 5 Un.
</ul> </a>
</section> </li>
{/each} <li>
<a href="/ean/7798062540253">Agua Saborizada Levité Pera 1,5 Lts.</a>
</li>
<li>
<a href="/ean/7790895000430">Gaseosa Coca-Cola Sabor Original 1,5 Lts.</a>
</li>
<li>
<a href="/ean/7792200000128">Bizcochos Agridulc 9 De Oro Paq 200 Grm</a>
</li>
</ul>
</section>
<section>
<h2 class="text-lg font-bold">Random</h2>
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
{#each productos as product}
<li>
<ProductPreview {product} />
</li>
{/each}
</ul>
</section>