Compare commits

...

8 commits

Author SHA1 Message Date
9fdc384005
Merge d87fa5f905 into 6d32c897ac 2024-08-04 15:31:57 -03:00
d87fa5f905 borrar db de sitio 2024-08-04 15:31:36 -03:00
76db90618c search 2024-08-04 15:30:59 -03:00
a3bdc59b73 count 2024-08-04 14:48:54 -03:00
d38b2a8cb0 history 2024-08-04 14:38:59 -03:00
4bf1351688 fix: usar env para API HOST 2024-08-04 13:03:52 -03:00
8d9fce5293 fix types 2024-08-04 12:57:01 -03:00
6d32c897ac scraper: shuffle links
to get other links if we start getting blocked
2024-08-02 16:25:14 -03:00
20 changed files with 210 additions and 60 deletions

2
.gitignore vendored
View file

@ -3,7 +3,7 @@ node_modules/
*.db-shm *.db-shm
*.db-wal *.db-wal
target/ target/
.env.* *.local
.DS_Store .DS_Store

View file

@ -0,0 +1 @@
DB_PATH=../sqlite.db

View file

@ -42,6 +42,9 @@ importers:
drizzle-orm: drizzle-orm:
specifier: ^0.32.0 specifier: ^0.32.0
version: 0.32.0(@types/better-sqlite3@7.6.9)(better-sqlite3@11.1.2) version: 0.32.0(@types/better-sqlite3@7.6.9)(better-sqlite3@11.1.2)
ky:
specifier: ^1.5.0
version: 1.5.0
zod: zod:
specifier: ^3.22.4 specifier: ^3.22.4
version: 3.22.4 version: 3.22.4
@ -1224,6 +1227,10 @@ packages:
resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==} resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==}
engines: {node: '>=6'} engines: {node: '>=6'}
ky@1.5.0:
resolution: {integrity: sha512-bkQo+UqryW6Zmo/DsixYZE4Z9t2mzvNMhceyIhuMuInb3knm5Q+GNGMKveydJAj+Z6piN1SwI6eR/V0G+Z0BtA==}
engines: {node: '>=18'}
lilconfig@2.1.0: lilconfig@2.1.0:
resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==} resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==}
engines: {node: '>=10'} engines: {node: '>=10'}
@ -2675,6 +2682,8 @@ snapshots:
kleur@4.1.5: {} kleur@4.1.5: {}
ky@1.5.0: {}
lilconfig@2.1.0: {} lilconfig@2.1.0: {}
lilconfig@3.1.1: {} lilconfig@3.1.1: {}

1
rust/Cargo.lock generated
View file

@ -353,6 +353,7 @@ dependencies = [
"iana-time-zone", "iana-time-zone",
"js-sys", "js-sys",
"num-traits", "num-traits",
"serde",
"wasm-bindgen", "wasm-bindgen",
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]

View file

@ -9,7 +9,7 @@ edition = "2021"
again = "0.1.2" again = "0.1.2"
anyhow = "1.0.79" anyhow = "1.0.79"
base64 = "0.21.7" base64 = "0.21.7"
chrono = "0.4" chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4.15", features = ["derive"] } clap = { version = "4.4.15", features = ["derive"] }
cron = "0.12.0" cron = "0.12.0"
sqlx = { version = "0.8", features = [ "runtime-tokio", "sqlite", "chrono", "json" ] } sqlx = { version = "0.8", features = [ "runtime-tokio", "sqlite", "chrono", "json" ] }

View file

@ -1,4 +1,11 @@
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Json, Router}; use axum::{
extract::{Path, State},
http::StatusCode,
response::IntoResponse,
routing::get,
Json, Router,
};
use chrono::{DateTime, Utc};
use clap::ValueEnum; use clap::ValueEnum;
use futures::future::join_all; use futures::future::join_all;
use itertools::Itertools; use itertools::Itertools;
@ -168,6 +175,104 @@ async fn get_best_selling(State(pool): State<SqlitePool>) -> impl IntoResponse {
Json(categories_with_products) Json(categories_with_products)
} }
async fn get_product_history(
State(pool): State<SqlitePool>,
Path(ean): Path<String>,
) -> impl IntoResponse {
#[derive(sqlx::FromRow, Debug, Serialize)]
struct Precio {
ean: String,
fetched_at: chrono::DateTime<Utc>,
precio_centavos: Option<i64>,
in_stock: Option<bool>,
url: String,
name: Option<String>,
image_url: Option<String>,
}
let precios = sqlx::query!(
"
select ean,fetched_at,precio_centavos,in_stock,url,name,image_url from precios
where ean = ?
order by fetched_at
",
ean
)
.map(|r| Precio {
ean: r.ean,
url: r.url,
fetched_at: DateTime::from_timestamp(r.fetched_at, 0).unwrap(),
image_url: r.image_url,
name: r.name,
in_stock: r.in_stock.map(|x| x == 1),
precio_centavos: r.precio_centavos,
})
.fetch_all(&pool)
.await
.unwrap();
Json(precios)
}
async fn search(State(pool): State<SqlitePool>, Path(query): Path<String>) -> impl IntoResponse {
let sql_query = query
.clone()
.replace("\"", "\"\"")
.split(" ")
.map(|x| format!("\"{}\"", x))
.join(" ");
#[derive(Serialize)]
struct Result {
ean: String,
name: String,
image_url: String,
}
let results = sqlx::query!(
"with search_results as (
select f.ean from precios_fts f
where f.name match ? and f.ean != ''
group by f.ean
limit 100
)
select p.id, p.ean, p.name, p.image_url from search_results as s
join precios as p
on p.ean = s.ean
where p.fetched_at = (
SELECT MAX(fetched_at)
FROM precios as pf
WHERE pf.ean = s.ean and pf.name is not null
);",
sql_query
)
.fetch_all(&pool)
.await
.unwrap()
.into_iter()
.map(|r| Result {
ean: r.ean,
image_url: r.image_url.unwrap(),
name: r.name.unwrap(),
})
.collect_vec();
Json(results)
}
async fn get_info(State(pool): State<SqlitePool>) -> impl IntoResponse {
#[derive(Serialize)]
struct Info {
count: i64,
}
let count = sqlx::query!("select count(distinct ean) as count from precios")
.fetch_one(&pool)
.await
.unwrap()
.count;
Json(Info { count })
}
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
tracing_subscriber::fmt::init(); tracing_subscriber::fmt::init();
@ -205,6 +310,9 @@ async fn main() {
.route("/", get(index)) .route("/", get(index))
.route("/api/healthcheck", get(healthcheck)) .route("/api/healthcheck", get(healthcheck))
.route("/api/0/best-selling-products", get(get_best_selling)) .route("/api/0/best-selling-products", get(get_best_selling))
.route("/api/0/ean/:ean/history", get(get_product_history))
.route("/api/0/info", get(get_info))
.route("/api/0/search/:query", get(search))
.with_state(pool); .with_state(pool);
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap(); let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();

View file

@ -6,6 +6,8 @@ use crate::db::Db;
use crate::scraper::Scraper; use crate::scraper::Scraper;
use futures::Future; use futures::Future;
use preciazo::supermercado::Supermercado; use preciazo::supermercado::Supermercado;
use rand::seq::SliceRandom;
use rand::thread_rng;
use reqwest::Url; use reqwest::Url;
#[derive(Clone)] #[derive(Clone)]
@ -48,6 +50,7 @@ impl Auto {
if let Some(n) = self.args.n_products { if let Some(n) = self.args.n_products {
links.truncate(n); links.truncate(n);
} }
links.shuffle(&mut thread_rng());
links links
}; };
// { // {

2
sitio/.env.development Normal file
View file

@ -0,0 +1,2 @@
DB_PATH=../sqlite.db
VITE_API_HOST=http://localhost:8000

2
sitio/.gitignore vendored
View file

@ -4,7 +4,7 @@ node_modules
/.svelte-kit /.svelte-kit
/package /package
.env .env
.env.* *.local
!.env.example !.env.example
vite.config.js.timestamp-* vite.config.js.timestamp-*
vite.config.ts.timestamp-* vite.config.ts.timestamp-*

View file

@ -40,6 +40,7 @@
"chartjs-adapter-dayjs-4": "^1.0.4", "chartjs-adapter-dayjs-4": "^1.0.4",
"dayjs": "^1.11.10", "dayjs": "^1.11.10",
"drizzle-orm": "^0.32.0", "drizzle-orm": "^0.32.0",
"ky": "^1.5.0",
"zod": "^3.22.4" "zod": "^3.22.4"
}, },
"packageManager": "pnpm@9.5.0+sha512.140036830124618d624a2187b50d04289d5a087f326c9edfc0ccd733d76c4f52c3a313d4fc148794a2a9d81553016004e6742e8cf850670268a7387fc220c903" "packageManager": "pnpm@9.5.0+sha512.140036830124618d624a2187b50d04289d5a087f326c9edfc0ccd733d76c4f52c3a313d4fc148794a2a9d81553016004e6742e8cf850670268a7387fc220c903"

View file

@ -1,5 +1,9 @@
<script lang="ts" context="module"> <script lang="ts" context="module">
export type Product = { ean: string; name: string; image_url: string | null }; export type Product = {
ean: string;
name: string | null;
image_url: string | null;
};
</script> </script>
<script lang="ts"> <script lang="ts">

View file

@ -1 +1,2 @@
// place files you want to import through the `$lib` alias in this folder. // place files you want to import through the `$lib` alias in this folder.
export const API_HOST = import.meta.env.VITE_API_HOST;

View file

@ -1,2 +0,0 @@
export { getDb } from "db-datos/db.js";
export * as schema from "db-datos/schema.js";

View file

@ -1,15 +1,17 @@
import { countDistinct } from "drizzle-orm";
import type { PageServerLoad } from "./$types"; import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db"; import { z } from "zod";
const { precios } = schema; import ky from "ky";
import { API_HOST } from "$lib";
async function getInfo() {
return z
.object({
count: z.number(),
})
.parse(await ky.get(`${API_HOST}/api/0/info`).json());
}
export const load: PageServerLoad = async () => { export const load: PageServerLoad = async () => {
const db = await getDb(); const nProductos = (await getInfo()).count;
const nProductosR = await db
.select({
count: countDistinct(precios.ean),
})
.from(precios);
const nProductos = nProductosR[0].count;
return { nProductos }; return { nProductos };
}; };

View file

@ -2,7 +2,9 @@ import type { PageServerLoad } from "./$types";
import z from "zod"; import z from "zod";
async function getBestSelling() { async function getBestSelling() {
const res = await fetch("http://localhost:8000/api/0/best-selling-products"); const res = await fetch(
`${import.meta.env.VITE_API_HOST}/api/0/best-selling-products`,
);
const json = await res.json(); const json = await res.json();
return z return z
.array( .array(

View file

@ -1,20 +1,23 @@
import { error } from "@sveltejs/kit"; import { error } from "@sveltejs/kit";
import { eq } from "drizzle-orm";
import type { PageServerLoad } from "./$types"; import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db"; import { z } from "zod";
const { precios } = schema; import { zPrecio, type Precio } from "./common";
import { API_HOST } from "$lib";
async function getProductHistory(ean: string) {
const res = await fetch(`${API_HOST}/api/0/ean/${ean}/history`);
const json = await res.json();
return z.array(zPrecio).parse(json);
}
export const load: PageServerLoad = async ({ params }) => { export const load: PageServerLoad = async ({ params }) => {
const db = await getDb(); const res = await getProductHistory(params.ean);
const q = db
.select()
.from(precios)
.where(eq(precios.ean, params.ean))
.orderBy(precios.fetchedAt);
const res = await q;
if (res.length === 0) return error(404, "Not Found"); if (res.length === 0) return error(404, "Not Found");
const meta = res.findLast((p) => p.name); const meta = res.findLast(
(p): p is Precio & { name: string; image_url: string } =>
!!(p.name && p.image_url),
);
return { precios: res, meta }; return { precios: res, meta };
}; };

View file

@ -1,18 +1,18 @@
<script lang="ts"> <script lang="ts">
import { Supermercado, hosts } from "db-datos/supermercado"; import { Supermercado, hosts } from "db-datos/supermercado";
import * as schema from "db-datos/schema";
import type { PageData } from "./$types"; import type { PageData } from "./$types";
import Chart from "./Chart.svelte"; import Chart from "./Chart.svelte";
import type { Precio } from "./common";
export let data: PageData; export let data: PageData;
let urls: Map<Supermercado, schema.Precio>; let urls: Map<Supermercado, Precio>;
$: urls = data.precios.reduce((prev, curr) => { $: urls = data.precios.reduce((prev, curr) => {
const url = new URL(curr.url); const url = new URL(curr.url);
const supermercado = hosts[url.hostname]; const supermercado = hosts[url.hostname];
prev.set(supermercado, curr); prev.set(supermercado, curr);
return prev; return prev;
}, new Map<Supermercado, schema.Precio>()); }, new Map<Supermercado, Precio>());
const classBySupermercado: { [supermercado in Supermercado]: string } = { const classBySupermercado: { [supermercado in Supermercado]: string } = {
[Supermercado.Dia]: "bg-[#d52b1e] focus:ring-[#d52b1e]", [Supermercado.Dia]: "bg-[#d52b1e] focus:ring-[#d52b1e]",
@ -30,18 +30,18 @@
{#if data.meta} {#if data.meta}
<h1 class="text-3xl font-bold">{data.meta.name}</h1> <h1 class="text-3xl font-bold">{data.meta.name}</h1>
<img src={data.meta.imageUrl} alt={data.meta.name} class="max-h-48" /> <img src={data.meta.image_url} alt={data.meta.name} class="max-h-48" />
<div class="flex gap-2"> <div class="flex gap-2">
{#each urls as [supermercado, { url, precioCentavos }]} {#each urls as [supermercado, { url, precio_centavos }]}
<a <a
href={url} href={url}
rel="noreferrer noopener" rel="noreferrer noopener"
target="_blank" target="_blank"
class={`focus:shadow-outline inline-flex flex-col items-center justify-center rounded-md ${classBySupermercado[supermercado]} px-4 py-2 font-medium tracking-wide text-white transition-colors duration-200 hover:bg-opacity-80 focus:outline-none focus:ring-2 focus:ring-offset-2`} class={`focus:shadow-outline inline-flex flex-col items-center justify-center rounded-md ${classBySupermercado[supermercado]} px-4 py-2 font-medium tracking-wide text-white transition-colors duration-200 hover:bg-opacity-80 focus:outline-none focus:ring-2 focus:ring-offset-2`}
> >
{#if precioCentavos} {#if precio_centavos}
<span class="text-lg font-bold" <span class="text-lg font-bold"
>{formatter.format(precioCentavos / 100)}</span >{formatter.format(precio_centavos / 100)}</span
> >
{/if} {/if}
<span class="text-sm">{supermercado}</span> <span class="text-sm">{supermercado}</span>

View file

@ -1,8 +1,8 @@
<script lang="ts"> <script lang="ts">
import type { Precio } from "db-datos/schema";
// import dayjs from "dayjs"; // import dayjs from "dayjs";
import ChartJs from "./ChartJs.svelte"; import ChartJs from "./ChartJs.svelte";
import { hosts, colorBySupermercado } from "db-datos/supermercado"; import { hosts, colorBySupermercado } from "db-datos/supermercado";
import type { Precio } from "./common";
export let precios: Precio[]; export let precios: Precio[];
@ -15,15 +15,15 @@
const ps = precios const ps = precios
.filter((p) => new URL(p.url!).hostname === host) .filter((p) => new URL(p.url!).hostname === host)
.filter( .filter(
(p): p is Precio & { precioCentavos: number } => (p): p is Precio & { precio_centavos: number } =>
p.precioCentavos !== null, p.precio_centavos !== null,
); );
return { return {
label: supermercado, label: supermercado,
data: [ data: [
...ps.map((p) => ({ ...ps.map((p) => ({
x: p.fetchedAt, x: p.fetched_at,
y: p.precioCentavos / 100, y: p.precio_centavos / 100,
})), })),
// lie // lie
// ...ps.map((p) => ({ // ...ps.map((p) => ({

View file

@ -0,0 +1,12 @@
import { z } from "zod";
export const zPrecio = z.object({
ean: z.string(),
fetched_at: z.coerce.date(),
precio_centavos: z.number().nullable(),
in_stock: z.boolean().nullable(),
url: z.string(),
name: z.string().nullable(),
image_url: z.string().nullable(),
});
export type Precio = z.infer<typeof zPrecio>;

View file

@ -1,26 +1,29 @@
import { sql } from "drizzle-orm"; import { z } from "zod";
import type { PageServerLoad } from "./$types"; import type { PageServerLoad } from "./$types";
import { getDb } from "$lib/server/db"; import { API_HOST } from "$lib";
import ky from "ky";
const zProductResult = z.object({
ean: z.string(),
name: z.string(),
image_url: z.string(),
});
async function search(query: string) {
return z
.array(zProductResult)
.parse(
await ky
.get(`${API_HOST}/api/0/search/${encodeURIComponent(query)}`)
.json(),
);
}
export const load: PageServerLoad = async ({ url }) => { export const load: PageServerLoad = async ({ url }) => {
const db = await getDb();
const query = url.searchParams.get("q"); const query = url.searchParams.get("q");
let results: null | { ean: string; name: string; imageUrl: string }[] = null; let results: null | { ean: string; name: string; image_url: string }[] = query
if (query) { ? await search(query)
const sQuery = query : null;
.replaceAll(`"`, `""`)
.split(" ")
.map((s) => `"${s}"`)
.join(" ");
console.debug(sQuery);
const sqlQuery = sql`select p.ean, p.name, p.image_url as imageUrl from precios_fts f
join precios p on p.ean = f.ean
where f.name match ${sQuery}
group by p.ean
having max(p.fetched_at)
order by p.in_stock desc;`;
results = db.all(sqlQuery);
}
return { query, results }; return { query, results };
}; };