mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 14:16:19 +00:00
Compare commits
6 commits
c53897891b
...
3b63fd0775
Author | SHA1 | Date | |
---|---|---|---|
3b63fd0775 | |||
c12348a85b | |||
ca5d0e81dc | |||
fb0d5cd7d5 | |||
b0a6640807 | |||
f6429d09bc |
8 changed files with 125 additions and 125 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -5,7 +5,7 @@ data/carrefour
|
|||
p.*
|
||||
p
|
||||
node_modules/
|
||||
*.db
|
||||
*.db*
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
scraper/debug/
|
||||
|
|
|
@ -42,6 +42,9 @@ importers:
|
|||
drizzle-orm:
|
||||
specifier: ^0.29.1
|
||||
version: 0.29.3(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
|
||||
zod:
|
||||
specifier: ^3.22.4
|
||||
version: 3.22.4
|
||||
devDependencies:
|
||||
'@sveltejs/adapter-node':
|
||||
specifier: ^2.0.2
|
||||
|
@ -2859,4 +2862,3 @@ packages:
|
|||
|
||||
/zod@3.22.4:
|
||||
resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==}
|
||||
dev: true
|
||||
|
|
|
@ -47,7 +47,10 @@ struct ScrapUrlArgs {
|
|||
url: String,
|
||||
}
|
||||
#[derive(clap::Args)]
|
||||
struct AutoArgs {}
|
||||
struct AutoArgs {
|
||||
#[arg(long)]
|
||||
n_products: Option<usize>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
|
@ -59,7 +62,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await,
|
||||
Args::ScrapUrl(a) => scrap_url_cli(a.url).await,
|
||||
Args::ScrapBestSelling => scrap_best_selling_cli().await,
|
||||
Args::Auto(_) => auto_cli().await,
|
||||
Args::Auto(a) => auto_cli(a).await,
|
||||
Args::Cron(_) => cron_cli().await,
|
||||
}
|
||||
}
|
||||
|
@ -166,7 +169,7 @@ fn build_client() -> reqwest::Client {
|
|||
headers.append("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36".parse().unwrap());
|
||||
reqwest::ClientBuilder::default()
|
||||
.timeout(Duration::from_secs(60 * 5))
|
||||
.connect_timeout(Duration::from_secs(60))
|
||||
.connect_timeout(Duration::from_secs(30))
|
||||
.default_headers(headers)
|
||||
.build()
|
||||
.unwrap()
|
||||
|
@ -176,10 +179,19 @@ pub async fn do_request(client: &reqwest::Client, url: &str) -> reqwest::Result<
|
|||
let response = client.execute(request).await?.error_for_status()?;
|
||||
Ok(response)
|
||||
}
|
||||
async fn request_and_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
|
||||
let res = do_request(client, url).await?;
|
||||
res.text().await
|
||||
}
|
||||
pub async fn fetch_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
|
||||
get_retry_policy()
|
||||
.retry_if(|| request_and_body(client, url), retry_if_wasnt_not_found)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn get_retry_policy() -> again::RetryPolicy {
|
||||
RetryPolicy::exponential(Duration::from_millis(300))
|
||||
.with_max_retries(10)
|
||||
.with_max_retries(20)
|
||||
.with_jitter(true)
|
||||
}
|
||||
|
||||
|
@ -192,11 +204,7 @@ async fn fetch_and_parse(
|
|||
client: &reqwest::Client,
|
||||
url: String,
|
||||
) -> Result<PrecioPoint, anyhow::Error> {
|
||||
let body = get_retry_policy()
|
||||
.retry_if(|| do_request(client, &url), retry_if_wasnt_not_found)
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
let body = fetch_body(client, &url).await?;
|
||||
|
||||
let maybe_point = { scrap_url(client, url, &body).await };
|
||||
|
||||
|
@ -287,6 +295,7 @@ struct AutoTelegram {
|
|||
struct Auto {
|
||||
db: Db,
|
||||
telegram: Option<AutoTelegram>,
|
||||
limit_n_products: Option<usize>,
|
||||
}
|
||||
impl Auto {
|
||||
async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
|
||||
|
@ -300,7 +309,13 @@ impl Auto {
|
|||
))
|
||||
.await;
|
||||
}
|
||||
let links: Vec<String> = self.db.get_urls_by_domain(supermercado.host()).await?;
|
||||
let links: Vec<String> = {
|
||||
let mut links = self.db.get_urls_by_domain(supermercado.host()).await?;
|
||||
if let Some(n) = self.limit_n_products {
|
||||
links.truncate(n);
|
||||
}
|
||||
links
|
||||
};
|
||||
// {
|
||||
// let debug_path = PathBuf::from("debug/");
|
||||
// tokio::fs::create_dir_all(&debug_path).await.unwrap();
|
||||
|
@ -340,7 +355,7 @@ impl Auto {
|
|||
}
|
||||
|
||||
async fn inform(&self, msg: &str) {
|
||||
println!("{}", msg);
|
||||
tracing::info!("{}", msg);
|
||||
if let Some(telegram) = &self.telegram {
|
||||
let u = Url::parse_with_params(
|
||||
&format!("https://api.telegram.org/bot{}/sendMessage", telegram.token),
|
||||
|
@ -355,7 +370,7 @@ impl Auto {
|
|||
}
|
||||
}
|
||||
|
||||
async fn auto_cli() -> anyhow::Result<()> {
|
||||
async fn auto_cli(args: AutoArgs) -> anyhow::Result<()> {
|
||||
let auto = {
|
||||
let db = Db::connect().await?;
|
||||
let telegram = {
|
||||
|
@ -370,7 +385,11 @@ async fn auto_cli() -> anyhow::Result<()> {
|
|||
}
|
||||
}
|
||||
};
|
||||
Auto { db, telegram }
|
||||
Auto {
|
||||
db,
|
||||
telegram,
|
||||
limit_n_products: args.n_products,
|
||||
}
|
||||
};
|
||||
auto.inform("[auto] Empezando scrap").await;
|
||||
let handles: Vec<_> = Supermercado::value_variants()
|
||||
|
@ -378,6 +397,7 @@ async fn auto_cli() -> anyhow::Result<()> {
|
|||
.map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned())))
|
||||
.collect();
|
||||
future::try_join_all(handles).await?;
|
||||
auto.inform("[auto] Download supermercados finished").await;
|
||||
|
||||
let best_selling = auto
|
||||
.inform_time(
|
||||
|
@ -404,7 +424,7 @@ async fn cron_cli() -> anyhow::Result<()> {
|
|||
.unwrap();
|
||||
println!("Waiting for {:?}", t);
|
||||
tokio::time::sleep(t).await;
|
||||
auto_cli().await.unwrap();
|
||||
auto_cli(AutoArgs { n_products: None }).await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,8 +14,11 @@
|
|||
"format": "prettier --write ."
|
||||
},
|
||||
"devDependencies": {
|
||||
"@sveltejs/adapter-node": "^2.0.2",
|
||||
"@sveltejs/kit": "^2.0.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
||||
"@types/better-sqlite3": "^7.6.8",
|
||||
"@types/node": "^20.10.6",
|
||||
"autoprefixer": "^10.4.16",
|
||||
"db-datos": "workspace:^",
|
||||
"postcss": "^8.4.32",
|
||||
|
@ -28,10 +31,7 @@
|
|||
"tailwindcss": "^3.3.6",
|
||||
"tslib": "^2.4.1",
|
||||
"typescript": "^5.0.0",
|
||||
"vite": "^5.0.3",
|
||||
"@sveltejs/adapter-node": "^2.0.2",
|
||||
"@types/better-sqlite3": "^7.6.8",
|
||||
"@types/node": "^20.10.6"
|
||||
"vite": "^5.0.3"
|
||||
},
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
|
@ -39,6 +39,7 @@
|
|||
"chart.js": "^4.4.1",
|
||||
"chartjs-adapter-dayjs-4": "^1.0.4",
|
||||
"dayjs": "^1.11.10",
|
||||
"drizzle-orm": "^0.29.1"
|
||||
"drizzle-orm": "^0.29.1",
|
||||
"zod": "^3.22.4"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,3 +2,7 @@
|
|||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
:root {
|
||||
color-scheme: light dark;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,19 @@
|
|||
<script lang="ts">
|
||||
export let product: { ean: string; name: string; imageUrl?: string | null };
|
||||
<script lang="ts" context="module">
|
||||
export type Product = { ean: string; name: string; imageUrl: string | null };
|
||||
</script>
|
||||
|
||||
<a href={`/ean/${product.ean}`} class="flex">
|
||||
<script lang="ts">
|
||||
export let product: Product;
|
||||
</script>
|
||||
|
||||
<a href={`/ean/${product.ean}`} class="flex gap-2">
|
||||
{#if product.imageUrl}
|
||||
<img src={product.imageUrl} alt={product.name} class="max-h-48" />
|
||||
<img
|
||||
src={product.imageUrl}
|
||||
alt={product.name}
|
||||
class="max-h-48"
|
||||
loading="lazy"
|
||||
/>
|
||||
{/if}
|
||||
<p class="text-xl">{product.name}</p>
|
||||
</a>
|
||||
|
|
|
@ -1,64 +1,60 @@
|
|||
import type { PageData, PageServerLoad } from "./$types";
|
||||
import { getDb, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
import { desc, sql } from "drizzle-orm";
|
||||
const { precios, bestSelling } = schema;
|
||||
import { desc, max, sql } from "drizzle-orm";
|
||||
import {
|
||||
Supermercado,
|
||||
hostBySupermercado,
|
||||
supermercados,
|
||||
} from "db-datos/supermercado";
|
||||
import z from "zod";
|
||||
import type { Product } from "$lib/ProductPreview.svelte";
|
||||
|
||||
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
|
||||
type Data = {
|
||||
category: string;
|
||||
products: Product[];
|
||||
}[];
|
||||
|
||||
let cache: Promise<{ key: Date; data: Data }> = doQuery();
|
||||
|
||||
async function doQuery() {
|
||||
const db = await getDb();
|
||||
console.time("ean");
|
||||
const eans = await db
|
||||
|
||||
const categories = await db
|
||||
.select({
|
||||
ean: precios.ean,
|
||||
fetchedAt: bestSelling.fetchedAt,
|
||||
category: bestSelling.category,
|
||||
eansJson: bestSelling.eansJson,
|
||||
})
|
||||
.from(precios)
|
||||
.groupBy(precios.ean)
|
||||
.orderBy(sql`random()`)
|
||||
.limit(50);
|
||||
console.timeEnd("ean");
|
||||
.from(bestSelling)
|
||||
.groupBy(bestSelling.category)
|
||||
.having(max(bestSelling.fetchedAt));
|
||||
|
||||
return;
|
||||
const categoriesWithProducts = await Promise.all(
|
||||
categories.map(async (category) => {
|
||||
const eans = z.array(z.string()).parse(JSON.parse(category.eansJson));
|
||||
|
||||
const precioss = await Promise.all(
|
||||
supermercados.map(
|
||||
async (
|
||||
supermercado,
|
||||
): Promise<
|
||||
[
|
||||
Supermercado,
|
||||
{ ean: string; name: string | null; imageUrl: string | null }[],
|
||||
]
|
||||
> => {
|
||||
const host = hostBySupermercado[supermercado];
|
||||
console.time(supermercado);
|
||||
const q = db
|
||||
.select({
|
||||
ean: precios.ean,
|
||||
name: precios.name,
|
||||
imageUrl: precios.imageUrl,
|
||||
})
|
||||
.from(precios)
|
||||
.groupBy(precios.ean)
|
||||
.having(sql`max(fetched_at)`)
|
||||
.where(
|
||||
sql`ean in ${eans.map((x) => x.ean)} and in_stock and url like ${`%${host}%`}`,
|
||||
);
|
||||
// console.debug(q.toSQL());
|
||||
const res = await q;
|
||||
console.timeEnd(supermercado);
|
||||
return [supermercado, res];
|
||||
},
|
||||
),
|
||||
const products = await db
|
||||
.select({
|
||||
ean: precios.ean,
|
||||
name: precios.name,
|
||||
imageUrl: precios.imageUrl,
|
||||
})
|
||||
.from(precios)
|
||||
.where(sql`${precios.ean} in ${eans}`)
|
||||
.groupBy(precios.ean)
|
||||
.having(max(precios.fetchedAt));
|
||||
|
||||
return {
|
||||
category: category.category,
|
||||
products: eans
|
||||
.map((ean) => products.find((p) => p.ean === ean))
|
||||
.filter((x): x is Product => !!x && !!x.name),
|
||||
};
|
||||
}),
|
||||
);
|
||||
const data = { precios: precioss.flatMap(([_, r]) => r) };
|
||||
return { key: new Date(), data };
|
||||
|
||||
return { key: new Date(), data: categoriesWithProducts };
|
||||
}
|
||||
|
||||
setInterval(
|
||||
|
@ -69,14 +65,8 @@ setInterval(
|
|||
4 * 60 * 60 * 1000,
|
||||
);
|
||||
|
||||
type Precios = {
|
||||
ean: string;
|
||||
name: string | null;
|
||||
imageUrl: string | null;
|
||||
}[];
|
||||
|
||||
export const load: PageServerLoad = async ({
|
||||
params,
|
||||
}): Promise<{ precios: Precios }> => {
|
||||
return (await cache).data;
|
||||
}): Promise<{ data: Data }> => {
|
||||
return { data: (await cache).data };
|
||||
};
|
||||
|
|
|
@ -3,53 +3,27 @@
|
|||
import type { PageData } from "./$types";
|
||||
|
||||
export let data: PageData;
|
||||
$: precios = data.precios.filter(
|
||||
(d): d is { ean: string; name: string; imageUrl: string | null } =>
|
||||
!!d.name,
|
||||
);
|
||||
$: productos = precios.reduce(
|
||||
(prev, curr) => [
|
||||
...prev,
|
||||
...(prev.find((p) => p.ean === curr.ean) ? [] : [curr]),
|
||||
],
|
||||
[] as { ean: string; name: string; imageUrl: string | null }[],
|
||||
);
|
||||
|
||||
const categoryLabels: { [key in string]: string } = {
|
||||
almacen: "Almacen",
|
||||
bebidas: "Bebidas",
|
||||
"frutas-y-verduras": "Frutas y Verduras",
|
||||
};
|
||||
</script>
|
||||
|
||||
<h1 class="text-xl">WIP</h1>
|
||||
|
||||
<section>
|
||||
<h2 class="text-lg font-bold">Ejemplos</h2>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/ean/7790070410795">
|
||||
Cookies Sabor Vainilla Con Chips De Chocolate Exquisita Paq 300 Grm
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/ean/7794000006911">
|
||||
Sopa Instantánea KNORR QUICK Zapallo Romero Sobres 5 Un.
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/ean/7798062540253">Agua Saborizada Levité Pera 1,5 Lts.</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/ean/7790895000430">Gaseosa Coca-Cola Sabor Original 1,5 Lts.</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/ean/7792200000128">Bizcochos Agridulc 9 De Oro Paq 200 Grm</a>
|
||||
</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2 class="text-lg font-bold">Random</h2>
|
||||
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
|
||||
{#each productos as product}
|
||||
<li>
|
||||
<ProductPreview {product} />
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
</section>
|
||||
{#each data.data as { category, products }}
|
||||
<section class="my-6">
|
||||
<h2 class="text-2xl font-bold">
|
||||
{categoryLabels[category] ?? category}
|
||||
</h2>
|
||||
<ul
|
||||
class="grid max-w-full grid-flow-col grid-rows-2 gap-x-8 gap-y-4 overflow-x-auto"
|
||||
>
|
||||
{#each products as product}
|
||||
<li class="w-96">
|
||||
<ProductPreview {product} />
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
</section>
|
||||
{/each}
|
||||
|
|
Loading…
Reference in a new issue