mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 14:16:19 +00:00
Compare commits
6 commits
c53897891b
...
3b63fd0775
Author | SHA1 | Date | |
---|---|---|---|
3b63fd0775 | |||
c12348a85b | |||
ca5d0e81dc | |||
fb0d5cd7d5 | |||
b0a6640807 | |||
f6429d09bc |
8 changed files with 125 additions and 125 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -5,7 +5,7 @@ data/carrefour
|
||||||
p.*
|
p.*
|
||||||
p
|
p
|
||||||
node_modules/
|
node_modules/
|
||||||
*.db
|
*.db*
|
||||||
*.db-shm
|
*.db-shm
|
||||||
*.db-wal
|
*.db-wal
|
||||||
scraper/debug/
|
scraper/debug/
|
||||||
|
|
|
@ -42,6 +42,9 @@ importers:
|
||||||
drizzle-orm:
|
drizzle-orm:
|
||||||
specifier: ^0.29.1
|
specifier: ^0.29.1
|
||||||
version: 0.29.3(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
|
version: 0.29.3(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
|
||||||
|
zod:
|
||||||
|
specifier: ^3.22.4
|
||||||
|
version: 3.22.4
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@sveltejs/adapter-node':
|
'@sveltejs/adapter-node':
|
||||||
specifier: ^2.0.2
|
specifier: ^2.0.2
|
||||||
|
@ -2859,4 +2862,3 @@ packages:
|
||||||
|
|
||||||
/zod@3.22.4:
|
/zod@3.22.4:
|
||||||
resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==}
|
resolution: {integrity: sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==}
|
||||||
dev: true
|
|
||||||
|
|
|
@ -47,7 +47,10 @@ struct ScrapUrlArgs {
|
||||||
url: String,
|
url: String,
|
||||||
}
|
}
|
||||||
#[derive(clap::Args)]
|
#[derive(clap::Args)]
|
||||||
struct AutoArgs {}
|
struct AutoArgs {
|
||||||
|
#[arg(long)]
|
||||||
|
n_products: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
@ -59,7 +62,7 @@ async fn main() -> anyhow::Result<()> {
|
||||||
Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await,
|
Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await,
|
||||||
Args::ScrapUrl(a) => scrap_url_cli(a.url).await,
|
Args::ScrapUrl(a) => scrap_url_cli(a.url).await,
|
||||||
Args::ScrapBestSelling => scrap_best_selling_cli().await,
|
Args::ScrapBestSelling => scrap_best_selling_cli().await,
|
||||||
Args::Auto(_) => auto_cli().await,
|
Args::Auto(a) => auto_cli(a).await,
|
||||||
Args::Cron(_) => cron_cli().await,
|
Args::Cron(_) => cron_cli().await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,7 +169,7 @@ fn build_client() -> reqwest::Client {
|
||||||
headers.append("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36".parse().unwrap());
|
headers.append("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36".parse().unwrap());
|
||||||
reqwest::ClientBuilder::default()
|
reqwest::ClientBuilder::default()
|
||||||
.timeout(Duration::from_secs(60 * 5))
|
.timeout(Duration::from_secs(60 * 5))
|
||||||
.connect_timeout(Duration::from_secs(60))
|
.connect_timeout(Duration::from_secs(30))
|
||||||
.default_headers(headers)
|
.default_headers(headers)
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -176,10 +179,19 @@ pub async fn do_request(client: &reqwest::Client, url: &str) -> reqwest::Result<
|
||||||
let response = client.execute(request).await?.error_for_status()?;
|
let response = client.execute(request).await?.error_for_status()?;
|
||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
async fn request_and_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
|
||||||
|
let res = do_request(client, url).await?;
|
||||||
|
res.text().await
|
||||||
|
}
|
||||||
|
pub async fn fetch_body(client: &reqwest::Client, url: &str) -> reqwest::Result<String> {
|
||||||
|
get_retry_policy()
|
||||||
|
.retry_if(|| request_and_body(client, url), retry_if_wasnt_not_found)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_retry_policy() -> again::RetryPolicy {
|
pub fn get_retry_policy() -> again::RetryPolicy {
|
||||||
RetryPolicy::exponential(Duration::from_millis(300))
|
RetryPolicy::exponential(Duration::from_millis(300))
|
||||||
.with_max_retries(10)
|
.with_max_retries(20)
|
||||||
.with_jitter(true)
|
.with_jitter(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,11 +204,7 @@ async fn fetch_and_parse(
|
||||||
client: &reqwest::Client,
|
client: &reqwest::Client,
|
||||||
url: String,
|
url: String,
|
||||||
) -> Result<PrecioPoint, anyhow::Error> {
|
) -> Result<PrecioPoint, anyhow::Error> {
|
||||||
let body = get_retry_policy()
|
let body = fetch_body(client, &url).await?;
|
||||||
.retry_if(|| do_request(client, &url), retry_if_wasnt_not_found)
|
|
||||||
.await?
|
|
||||||
.text()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let maybe_point = { scrap_url(client, url, &body).await };
|
let maybe_point = { scrap_url(client, url, &body).await };
|
||||||
|
|
||||||
|
@ -287,6 +295,7 @@ struct AutoTelegram {
|
||||||
struct Auto {
|
struct Auto {
|
||||||
db: Db,
|
db: Db,
|
||||||
telegram: Option<AutoTelegram>,
|
telegram: Option<AutoTelegram>,
|
||||||
|
limit_n_products: Option<usize>,
|
||||||
}
|
}
|
||||||
impl Auto {
|
impl Auto {
|
||||||
async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
|
async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
|
||||||
|
@ -300,7 +309,13 @@ impl Auto {
|
||||||
))
|
))
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
let links: Vec<String> = self.db.get_urls_by_domain(supermercado.host()).await?;
|
let links: Vec<String> = {
|
||||||
|
let mut links = self.db.get_urls_by_domain(supermercado.host()).await?;
|
||||||
|
if let Some(n) = self.limit_n_products {
|
||||||
|
links.truncate(n);
|
||||||
|
}
|
||||||
|
links
|
||||||
|
};
|
||||||
// {
|
// {
|
||||||
// let debug_path = PathBuf::from("debug/");
|
// let debug_path = PathBuf::from("debug/");
|
||||||
// tokio::fs::create_dir_all(&debug_path).await.unwrap();
|
// tokio::fs::create_dir_all(&debug_path).await.unwrap();
|
||||||
|
@ -340,7 +355,7 @@ impl Auto {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn inform(&self, msg: &str) {
|
async fn inform(&self, msg: &str) {
|
||||||
println!("{}", msg);
|
tracing::info!("{}", msg);
|
||||||
if let Some(telegram) = &self.telegram {
|
if let Some(telegram) = &self.telegram {
|
||||||
let u = Url::parse_with_params(
|
let u = Url::parse_with_params(
|
||||||
&format!("https://api.telegram.org/bot{}/sendMessage", telegram.token),
|
&format!("https://api.telegram.org/bot{}/sendMessage", telegram.token),
|
||||||
|
@ -355,7 +370,7 @@ impl Auto {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn auto_cli() -> anyhow::Result<()> {
|
async fn auto_cli(args: AutoArgs) -> anyhow::Result<()> {
|
||||||
let auto = {
|
let auto = {
|
||||||
let db = Db::connect().await?;
|
let db = Db::connect().await?;
|
||||||
let telegram = {
|
let telegram = {
|
||||||
|
@ -370,7 +385,11 @@ async fn auto_cli() -> anyhow::Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Auto { db, telegram }
|
Auto {
|
||||||
|
db,
|
||||||
|
telegram,
|
||||||
|
limit_n_products: args.n_products,
|
||||||
|
}
|
||||||
};
|
};
|
||||||
auto.inform("[auto] Empezando scrap").await;
|
auto.inform("[auto] Empezando scrap").await;
|
||||||
let handles: Vec<_> = Supermercado::value_variants()
|
let handles: Vec<_> = Supermercado::value_variants()
|
||||||
|
@ -378,6 +397,7 @@ async fn auto_cli() -> anyhow::Result<()> {
|
||||||
.map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned())))
|
.map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned())))
|
||||||
.collect();
|
.collect();
|
||||||
future::try_join_all(handles).await?;
|
future::try_join_all(handles).await?;
|
||||||
|
auto.inform("[auto] Download supermercados finished").await;
|
||||||
|
|
||||||
let best_selling = auto
|
let best_selling = auto
|
||||||
.inform_time(
|
.inform_time(
|
||||||
|
@ -404,7 +424,7 @@ async fn cron_cli() -> anyhow::Result<()> {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
println!("Waiting for {:?}", t);
|
println!("Waiting for {:?}", t);
|
||||||
tokio::time::sleep(t).await;
|
tokio::time::sleep(t).await;
|
||||||
auto_cli().await.unwrap();
|
auto_cli(AutoArgs { n_products: None }).await.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,11 @@
|
||||||
"format": "prettier --write ."
|
"format": "prettier --write ."
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@sveltejs/adapter-node": "^2.0.2",
|
||||||
"@sveltejs/kit": "^2.0.0",
|
"@sveltejs/kit": "^2.0.0",
|
||||||
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
||||||
|
"@types/better-sqlite3": "^7.6.8",
|
||||||
|
"@types/node": "^20.10.6",
|
||||||
"autoprefixer": "^10.4.16",
|
"autoprefixer": "^10.4.16",
|
||||||
"db-datos": "workspace:^",
|
"db-datos": "workspace:^",
|
||||||
"postcss": "^8.4.32",
|
"postcss": "^8.4.32",
|
||||||
|
@ -28,10 +31,7 @@
|
||||||
"tailwindcss": "^3.3.6",
|
"tailwindcss": "^3.3.6",
|
||||||
"tslib": "^2.4.1",
|
"tslib": "^2.4.1",
|
||||||
"typescript": "^5.0.0",
|
"typescript": "^5.0.0",
|
||||||
"vite": "^5.0.3",
|
"vite": "^5.0.3"
|
||||||
"@sveltejs/adapter-node": "^2.0.2",
|
|
||||||
"@types/better-sqlite3": "^7.6.8",
|
|
||||||
"@types/node": "^20.10.6"
|
|
||||||
},
|
},
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
@ -39,6 +39,7 @@
|
||||||
"chart.js": "^4.4.1",
|
"chart.js": "^4.4.1",
|
||||||
"chartjs-adapter-dayjs-4": "^1.0.4",
|
"chartjs-adapter-dayjs-4": "^1.0.4",
|
||||||
"dayjs": "^1.11.10",
|
"dayjs": "^1.11.10",
|
||||||
"drizzle-orm": "^0.29.1"
|
"drizzle-orm": "^0.29.1",
|
||||||
|
"zod": "^3.22.4"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,3 +2,7 @@
|
||||||
@tailwind base;
|
@tailwind base;
|
||||||
@tailwind components;
|
@tailwind components;
|
||||||
@tailwind utilities;
|
@tailwind utilities;
|
||||||
|
|
||||||
|
:root {
|
||||||
|
color-scheme: light dark;
|
||||||
|
}
|
||||||
|
|
|
@ -1,10 +1,19 @@
|
||||||
<script lang="ts">
|
<script lang="ts" context="module">
|
||||||
export let product: { ean: string; name: string; imageUrl?: string | null };
|
export type Product = { ean: string; name: string; imageUrl: string | null };
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<a href={`/ean/${product.ean}`} class="flex">
|
<script lang="ts">
|
||||||
|
export let product: Product;
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<a href={`/ean/${product.ean}`} class="flex gap-2">
|
||||||
{#if product.imageUrl}
|
{#if product.imageUrl}
|
||||||
<img src={product.imageUrl} alt={product.name} class="max-h-48" />
|
<img
|
||||||
|
src={product.imageUrl}
|
||||||
|
alt={product.name}
|
||||||
|
class="max-h-48"
|
||||||
|
loading="lazy"
|
||||||
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
<p class="text-xl">{product.name}</p>
|
<p class="text-xl">{product.name}</p>
|
||||||
</a>
|
</a>
|
||||||
|
|
|
@ -1,64 +1,60 @@
|
||||||
import type { PageData, PageServerLoad } from "./$types";
|
import type { PageData, PageServerLoad } from "./$types";
|
||||||
import { getDb, schema } from "$lib/server/db";
|
import { getDb, schema } from "$lib/server/db";
|
||||||
const { precios } = schema;
|
const { precios, bestSelling } = schema;
|
||||||
import { desc, sql } from "drizzle-orm";
|
import { desc, max, sql } from "drizzle-orm";
|
||||||
import {
|
import {
|
||||||
Supermercado,
|
Supermercado,
|
||||||
hostBySupermercado,
|
hostBySupermercado,
|
||||||
supermercados,
|
supermercados,
|
||||||
} from "db-datos/supermercado";
|
} from "db-datos/supermercado";
|
||||||
|
import z from "zod";
|
||||||
|
import type { Product } from "$lib/ProductPreview.svelte";
|
||||||
|
|
||||||
let cache: Promise<{ key: Date; data: { precios: Precios } }> = doQuery();
|
type Data = {
|
||||||
|
category: string;
|
||||||
|
products: Product[];
|
||||||
|
}[];
|
||||||
|
|
||||||
|
let cache: Promise<{ key: Date; data: Data }> = doQuery();
|
||||||
|
|
||||||
async function doQuery() {
|
async function doQuery() {
|
||||||
const db = await getDb();
|
const db = await getDb();
|
||||||
console.time("ean");
|
|
||||||
const eans = await db
|
const categories = await db
|
||||||
.select({
|
.select({
|
||||||
ean: precios.ean,
|
fetchedAt: bestSelling.fetchedAt,
|
||||||
|
category: bestSelling.category,
|
||||||
|
eansJson: bestSelling.eansJson,
|
||||||
})
|
})
|
||||||
.from(precios)
|
.from(bestSelling)
|
||||||
.groupBy(precios.ean)
|
.groupBy(bestSelling.category)
|
||||||
.orderBy(sql`random()`)
|
.having(max(bestSelling.fetchedAt));
|
||||||
.limit(50);
|
|
||||||
console.timeEnd("ean");
|
|
||||||
|
|
||||||
return;
|
const categoriesWithProducts = await Promise.all(
|
||||||
|
categories.map(async (category) => {
|
||||||
|
const eans = z.array(z.string()).parse(JSON.parse(category.eansJson));
|
||||||
|
|
||||||
const precioss = await Promise.all(
|
const products = await db
|
||||||
supermercados.map(
|
|
||||||
async (
|
|
||||||
supermercado,
|
|
||||||
): Promise<
|
|
||||||
[
|
|
||||||
Supermercado,
|
|
||||||
{ ean: string; name: string | null; imageUrl: string | null }[],
|
|
||||||
]
|
|
||||||
> => {
|
|
||||||
const host = hostBySupermercado[supermercado];
|
|
||||||
console.time(supermercado);
|
|
||||||
const q = db
|
|
||||||
.select({
|
.select({
|
||||||
ean: precios.ean,
|
ean: precios.ean,
|
||||||
name: precios.name,
|
name: precios.name,
|
||||||
imageUrl: precios.imageUrl,
|
imageUrl: precios.imageUrl,
|
||||||
})
|
})
|
||||||
.from(precios)
|
.from(precios)
|
||||||
|
.where(sql`${precios.ean} in ${eans}`)
|
||||||
.groupBy(precios.ean)
|
.groupBy(precios.ean)
|
||||||
.having(sql`max(fetched_at)`)
|
.having(max(precios.fetchedAt));
|
||||||
.where(
|
|
||||||
sql`ean in ${eans.map((x) => x.ean)} and in_stock and url like ${`%${host}%`}`,
|
return {
|
||||||
|
category: category.category,
|
||||||
|
products: eans
|
||||||
|
.map((ean) => products.find((p) => p.ean === ean))
|
||||||
|
.filter((x): x is Product => !!x && !!x.name),
|
||||||
|
};
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
// console.debug(q.toSQL());
|
|
||||||
const res = await q;
|
return { key: new Date(), data: categoriesWithProducts };
|
||||||
console.timeEnd(supermercado);
|
|
||||||
return [supermercado, res];
|
|
||||||
},
|
|
||||||
),
|
|
||||||
);
|
|
||||||
const data = { precios: precioss.flatMap(([_, r]) => r) };
|
|
||||||
return { key: new Date(), data };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
setInterval(
|
setInterval(
|
||||||
|
@ -69,14 +65,8 @@ setInterval(
|
||||||
4 * 60 * 60 * 1000,
|
4 * 60 * 60 * 1000,
|
||||||
);
|
);
|
||||||
|
|
||||||
type Precios = {
|
|
||||||
ean: string;
|
|
||||||
name: string | null;
|
|
||||||
imageUrl: string | null;
|
|
||||||
}[];
|
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({
|
export const load: PageServerLoad = async ({
|
||||||
params,
|
params,
|
||||||
}): Promise<{ precios: Precios }> => {
|
}): Promise<{ data: Data }> => {
|
||||||
return (await cache).data;
|
return { data: (await cache).data };
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,53 +3,27 @@
|
||||||
import type { PageData } from "./$types";
|
import type { PageData } from "./$types";
|
||||||
|
|
||||||
export let data: PageData;
|
export let data: PageData;
|
||||||
$: precios = data.precios.filter(
|
|
||||||
(d): d is { ean: string; name: string; imageUrl: string | null } =>
|
const categoryLabels: { [key in string]: string } = {
|
||||||
!!d.name,
|
almacen: "Almacen",
|
||||||
);
|
bebidas: "Bebidas",
|
||||||
$: productos = precios.reduce(
|
"frutas-y-verduras": "Frutas y Verduras",
|
||||||
(prev, curr) => [
|
};
|
||||||
...prev,
|
|
||||||
...(prev.find((p) => p.ean === curr.ean) ? [] : [curr]),
|
|
||||||
],
|
|
||||||
[] as { ean: string; name: string; imageUrl: string | null }[],
|
|
||||||
);
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<h1 class="text-xl">WIP</h1>
|
{#each data.data as { category, products }}
|
||||||
|
<section class="my-6">
|
||||||
<section>
|
<h2 class="text-2xl font-bold">
|
||||||
<h2 class="text-lg font-bold">Ejemplos</h2>
|
{categoryLabels[category] ?? category}
|
||||||
<ul>
|
</h2>
|
||||||
<li>
|
<ul
|
||||||
<a href="/ean/7790070410795">
|
class="grid max-w-full grid-flow-col grid-rows-2 gap-x-8 gap-y-4 overflow-x-auto"
|
||||||
Cookies Sabor Vainilla Con Chips De Chocolate Exquisita Paq 300 Grm
|
>
|
||||||
</a>
|
{#each products as product}
|
||||||
</li>
|
<li class="w-96">
|
||||||
<li>
|
|
||||||
<a href="/ean/7794000006911">
|
|
||||||
Sopa Instantánea KNORR QUICK Zapallo Romero Sobres 5 Un.
|
|
||||||
</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/ean/7798062540253">Agua Saborizada Levité Pera 1,5 Lts.</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/ean/7790895000430">Gaseosa Coca-Cola Sabor Original 1,5 Lts.</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<a href="/ean/7792200000128">Bizcochos Agridulc 9 De Oro Paq 200 Grm</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section>
|
|
||||||
<h2 class="text-lg font-bold">Random</h2>
|
|
||||||
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
|
|
||||||
{#each productos as product}
|
|
||||||
<li>
|
|
||||||
<ProductPreview {product} />
|
<ProductPreview {product} />
|
||||||
</li>
|
</li>
|
||||||
{/each}
|
{/each}
|
||||||
</ul>
|
</ul>
|
||||||
</section>
|
</section>
|
||||||
|
{/each}
|
||||||
|
|
Loading…
Reference in a new issue