Compare commits

...

19 commits

Author SHA1 Message Date
5dcc901a80 coto: parsear la imagen correcta
sino se captura la imagen de "OFERTA!"
2023-12-30 00:08:00 -03:00
f8cdbae1b3 usar la misma version de drizzle-orm para todo 2023-12-30 00:04:31 -03:00
d59b4283bc product preview en index 2023-12-30 00:02:02 -03:00
f20203cac8 productpreview en search 2023-12-29 23:59:45 -03:00
df5b6f3bf8 titulo en página de búsqueda 2023-12-29 23:51:58 -03:00
4d32ea99f3 modo oscuro básico 2023-12-29 23:50:33 -03:00
c4e626dbc3 scraper: script para pushear container 2023-12-29 22:59:17 -03:00
73759ae6d9 carrefour url scraper 2023-12-29 22:58:37 -03:00
2bac37df53 borrar undici 2023-12-29 22:54:11 -03:00
79a3a6a94a usar node para sitio
bun anda muy mal con sveltekit
2023-12-29 22:51:38 -03:00
48b3f3e436 usar alt en img 2023-12-29 22:49:23 -03:00
98a699e454 scrapear urls a BD 2023-12-29 21:49:32 -03:00
de3bf4900c coto: trimmear titulo 2023-12-29 20:08:01 -03:00
3690f63405 instalar sqlite en scraper para debuggear 2023-12-29 20:05:56 -03:00
b3201e7ed7 mejorar border input search 2023-12-29 19:57:48 -03:00
d5eb1fee1d no printear debug search 2023-12-29 19:57:48 -03:00
bc95d9c255 migrar en sitio 2023-12-29 19:57:48 -03:00
a55b5b9104 separar migraciones a varios archivos
aparentemente las migraciones de drizzle-kit con bun no soportan varios statement
2023-12-29 19:57:48 -03:00
0f453567ca WIP: barra de busqueda
lamentablemente drizzle-kit no está aplicando los triggers en la migracion, sino se puede subir
2023-12-29 19:57:48 -03:00
45 changed files with 1159 additions and 201 deletions

BIN
bun.lockb

Binary file not shown.

View file

@ -0,0 +1,44 @@
import pMap from "p-map";
import { saveUrls } from "db-datos/urlHelpers.js";
await scrapBySitemap();
export async function scrapCarrefourProducts() {
await scrapBySitemap();
}
async function scrapBySitemap() {
// de https://www.carrefour.com.ar/sitemap.xml
const sitemaps = [
"https://www.carrefour.com.ar/sitemap/product-0.xml",
"https://www.carrefour.com.ar/sitemap/product-1.xml",
"https://www.carrefour.com.ar/sitemap/product-2.xml",
"https://www.carrefour.com.ar/sitemap/product-3.xml",
"https://www.carrefour.com.ar/sitemap/product-4.xml",
"https://www.carrefour.com.ar/sitemap/product-5.xml",
"https://www.carrefour.com.ar/sitemap/product-6.xml",
"https://www.carrefour.com.ar/sitemap/product-7.xml",
"https://www.carrefour.com.ar/sitemap/product-8.xml",
"https://www.carrefour.com.ar/sitemap/product-9.xml",
];
await pMap(
sitemaps,
async (sitemapUrl) => {
const res = await fetch(sitemapUrl);
const xml = await res.text();
let urls = new Set<string>();
new HTMLRewriter()
.on("loc", {
text(element) {
const txt = element.text.trim();
if (!txt) return;
urls.add(txt);
},
})
.transform(new Response(xml));
saveUrls(Array.from(urls));
},
{ concurrency: 3 }
);
}

View file

@ -0,0 +1,17 @@
{
"name": "carrefour-link-scraper",
"type": "module",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"linkedom": "^0.16.5",
"p-map": "^7.0.1"
}
}

View file

@ -1,23 +1,24 @@
import { getHtml } from "../scraper/fetch.js"; import { getHtml } from "../scraper/fetch.js";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import PQueue from "p-queue"; import PQueue from "p-queue";
import { saveUrls } from "db-datos/urlHelpers.js";
// let fetched = new Set<string>(); export async function scrapCotoProducts() {
{
const initial = const initial =
"https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&No=2200&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29&Nrpp=200"; "https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&No=2200&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29&Nrpp=200";
const queue = new PQueue({ concurrency: 2 }); const queue = new PQueue({ concurrency: 4 });
const pageSize = 300; // hasta 1000 const pageSize = 300; // hasta 1000
const links = Array.from({ length: Math.ceil(29000 / 300) }, (x, i) => i).map( const links = Array.from(
(i) => { { length: Math.ceil(29000 / pageSize) },
const url = new URL(initial); (x, i) => i
url.searchParams.set("No", `${i * pageSize}`); ).map((i) => {
url.searchParams.set("Nrpp", `${pageSize}`); const url = new URL(initial);
return url.toString(); url.searchParams.set("No", `${i * pageSize}`);
} url.searchParams.set("Nrpp", `${pageSize}`);
); return url.toString();
});
const promises = links.map((l) => queue.add(getPage(l))); const promises = links.map((l) => queue.add(getPage(l)));
await Promise.all(promises); await Promise.all(promises);
@ -38,22 +39,6 @@ function getPage(url: string) {
document.querySelectorAll<HTMLAnchorElement>(".product_info_container a"), document.querySelectorAll<HTMLAnchorElement>(".product_info_container a"),
(a) => new URL(a.href, url).toString() (a) => new URL(a.href, url).toString()
); );
hrefs.forEach((h) => process.stdout.write(h + "\n")); saveUrls(hrefs);
// const nextLinks = Array.from(
// document.querySelectorAll<HTMLAnchorElement>(
// "#atg_store_pagination a[href]"
// ),
// (a) => new URL(a.href, url).toString()
// );
// await Promise.all(
// nextLinks
// .filter((l) => !fetched.has(l))
// .map((l) => {
// fetched.add(l);
// return queue.add(getPage(l));
// })
// );
}; };
} }

View file

@ -12,8 +12,6 @@
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"linkedom": "^0.16.5", "linkedom": "^0.16.5",
"p-queue": "^8.0.1", "p-queue": "^8.0.1"
"tsx": "^4.7.0",
"undici": "^6.2.0"
} }
} }

10
db-datos/db.ts Normal file
View file

@ -0,0 +1,10 @@
import { Database } from "bun:sqlite";
import { drizzle } from "drizzle-orm/bun-sqlite";
import { DB_PATH } from "./drizzle.config.js";
import { migrateDb } from "./migrate.js";
import * as schema from "./schema.js";
migrateDb();
export const sqlite = new Database(DB_PATH);
export const db = drizzle(sqlite, { schema });

View file

@ -0,0 +1,3 @@
-- Custom SQL migration file, put you code below! --
create virtual table precios_fts using fts5(ean, url, name, content=precios, content_rowid=id);

View file

@ -0,0 +1,2 @@
-- Custom SQL migration file, put you code below! --
insert into precios_fts(rowid,ean,url,name) select id,ean,url,name from precios;

View file

@ -0,0 +1,7 @@
-- Custom SQL migration file, put you code below! --
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
-- Triggers to keep the FTS index up to date.
CREATE TRIGGER precios_fts_ai AFTER INSERT ON precios BEGIN
INSERT INTO precios_fts(rowid, ean, url, name) VALUES (new.id, new.ean, new.url, new.name);
END;

View file

@ -0,0 +1,6 @@
-- Custom SQL migration file, put you code below! --
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
-- Triggers to keep the FTS index up to date.
CREATE TRIGGER precios_fts_ad AFTER DELETE ON precios BEGIN
INSERT INTO precios_fts(precios_fts, rowid, ean, url, name) VALUES('delete', old.id, old.ean, old.url, old.name);
END;

View file

@ -0,0 +1,8 @@
-- Custom SQL migration file, put you code below! --
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
-- Triggers to keep the FTS index up to date.
CREATE TRIGGER precios_fts_au AFTER UPDATE ON precios BEGIN
INSERT INTO precios_fts(precios_fts, rowid, ean, url, name) VALUES('delete', old.id, old.ean, old.url, old.name);
INSERT INTO precios_fts(rowid, ean, url, name) VALUES (new.id, new.ean, new.url, new.name);
END;

View file

@ -0,0 +1,8 @@
CREATE TABLE `producto_urls` (
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
`url` text NOT NULL,
`first_seen` integer NOT NULL,
`last_seen` integer NOT NULL
);
--> statement-breakpoint
CREATE UNIQUE INDEX `producto_urls_url_unique` ON `producto_urls` (`url`);

View file

@ -0,0 +1,101 @@
{
"id": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
"prevId": "e1217fdb-6f54-44c5-a04b-c5aebf202102",
"version": "5",
"dialect": "sqlite",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -0,0 +1,101 @@
{
"id": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
"prevId": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
"version": "5",
"dialect": "sqlite",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -0,0 +1,101 @@
{
"id": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
"prevId": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
"version": "5",
"dialect": "sqlite",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -0,0 +1,101 @@
{
"id": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
"prevId": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
"version": "5",
"dialect": "sqlite",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -0,0 +1,101 @@
{
"id": "082630a9-3744-4e33-bde5-06045ca57d36",
"prevId": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
"version": "5",
"dialect": "sqlite",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View file

@ -0,0 +1,146 @@
{
"version": "5",
"dialect": "sqlite",
"id": "2e398920-ffaf-4d55-ae13-d906cb9e0efa",
"prevId": "082630a9-3744-4e33-bde5-06045ca57d36",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"producto_urls": {
"name": "producto_urls",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"first_seen": {
"name": "first_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"last_seen": {
"name": "last_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"producto_urls_url_unique": {
"name": "producto_urls_url_unique",
"columns": [
"url"
],
"isUnique": true
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
}

View file

@ -29,6 +29,48 @@
"when": 1703521964385, "when": 1703521964385,
"tag": "0003_abandoned_landau", "tag": "0003_abandoned_landau",
"breakpoints": true "breakpoints": true
},
{
"idx": 4,
"version": "5",
"when": 1703726748364,
"tag": "0004_left_wolfsbane",
"breakpoints": true
},
{
"idx": 5,
"version": "5",
"when": 1703807455551,
"tag": "0005_lucky_epoch",
"breakpoints": true
},
{
"idx": 6,
"version": "5",
"when": 1703807457204,
"tag": "0006_jazzy_madripoor",
"breakpoints": true
},
{
"idx": 7,
"version": "5",
"when": 1703807458666,
"tag": "0007_bright_silvermane",
"breakpoints": true
},
{
"idx": 8,
"version": "5",
"when": 1703807460152,
"tag": "0008_funny_nighthawk",
"breakpoints": true
},
{
"idx": 9,
"version": "5",
"when": 1703895109501,
"tag": "0009_breezy_forge",
"breakpoints": true
} }
] ]
} }

View file

@ -1,15 +1,16 @@
import Database from "bun:sqlite"; import Database from "bun:sqlite";
import { join } from "node:path"; import { join, dirname } from "node:path";
import { drizzle } from "drizzle-orm/bun-sqlite"; import { drizzle } from "drizzle-orm/bun-sqlite";
import { migrate } from "drizzle-orm/bun-sqlite/migrator"; import { migrate } from "drizzle-orm/bun-sqlite/migrator";
import * as schema from "./schema.js"; import * as schema from "./schema.js";
import { DB_PATH } from "./drizzle.config.js"; import { DB_PATH } from "./drizzle.config.js";
const url = new URL(import.meta.url);
export function migrateDb() { export function migrateDb() {
const sqlite = new Database(DB_PATH); const sqlite = new Database(DB_PATH);
const db = drizzle(sqlite, { schema }); const db = drizzle(sqlite, { schema });
migrate(db, { migrationsFolder: join(import.meta.dir, "drizzle") }); migrate(db, { migrationsFolder: join(dirname(url.pathname), "drizzle") });
sqlite.run(` sqlite.run(`
pragma journal_mode = WAL; pragma journal_mode = WAL;
PRAGMA synchronous = NORMAL; PRAGMA synchronous = NORMAL;

View file

@ -11,7 +11,7 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"drizzle-orm": "^0.29.1" "drizzle-orm": "=0.29.1"
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "^1.0.0", "@types/bun": "^1.0.0",

View file

@ -22,3 +22,12 @@ export const precios = sqliteTable(
); );
export type Precio = typeof precios.$inferSelect; export type Precio = typeof precios.$inferSelect;
export const productoUrls = sqliteTable("producto_urls", {
id: integer("id", { mode: "number" }).primaryKey({ autoIncrement: true }),
url: text("url").unique().notNull(),
firstSeen: integer("first_seen", { mode: "timestamp" }).notNull(),
lastSeen: integer("last_seen", { mode: "timestamp" }).notNull(),
});
export type ProductUrl = typeof productoUrls.$inferSelect;

25
db-datos/urlHelpers.ts Normal file
View file

@ -0,0 +1,25 @@
import { sql } from "drizzle-orm";
import { db } from "./db.js";
import { productoUrls } from "./schema.js";
export function saveUrls(urls: string[]) {
db.transaction((tx) => {
const now = new Date();
const insertUrlTra = tx
.insert(productoUrls)
.values({
url: sql.placeholder("url"),
firstSeen: now,
lastSeen: now,
})
.onConflictDoUpdate({
target: productoUrls.url,
set: { lastSeen: now },
})
.prepare();
for (const href of urls) {
insertUrlTra.run({ url: href });
}
});
}

View file

@ -1,94 +1,110 @@
import pMap from "p-map"; import pMap from "p-map";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import { getHtml } from "../scraper/fetch.js"; import { getHtml } from "../scraper/fetch.js";
(async () => { import { saveUrls } from "db-datos/urlHelpers.js";
const categorias = [
"https://diaonline.supermercadosdia.com.ar/almacen", const categorias = [
"https://diaonline.supermercadosdia.com.ar/almacen/conservas", "https://diaonline.supermercadosdia.com.ar/almacen",
"https://diaonline.supermercadosdia.com.ar/almacen/aceites-y-aderezos", "https://diaonline.supermercadosdia.com.ar/almacen/conservas",
"https://diaonline.supermercadosdia.com.ar/almacen/pastas-secas", "https://diaonline.supermercadosdia.com.ar/almacen/aceites-y-aderezos",
"https://diaonline.supermercadosdia.com.ar/almacen/arroz-y-legumbres", "https://diaonline.supermercadosdia.com.ar/almacen/pastas-secas",
"https://diaonline.supermercadosdia.com.ar/almacen/panaderia", "https://diaonline.supermercadosdia.com.ar/almacen/arroz-y-legumbres",
"https://diaonline.supermercadosdia.com.ar/almacen/golosinas-y-alfajores", "https://diaonline.supermercadosdia.com.ar/almacen/panaderia",
"https://diaonline.supermercadosdia.com.ar/almacen/reposteria", "https://diaonline.supermercadosdia.com.ar/almacen/golosinas-y-alfajores",
"https://diaonline.supermercadosdia.com.ar/almacen/comidas-listas", "https://diaonline.supermercadosdia.com.ar/almacen/reposteria",
"https://diaonline.supermercadosdia.com.ar/almacen/harinas", "https://diaonline.supermercadosdia.com.ar/almacen/comidas-listas",
"https://diaonline.supermercadosdia.com.ar/almacen/picadas", "https://diaonline.supermercadosdia.com.ar/almacen/harinas",
"https://diaonline.supermercadosdia.com.ar/almacen/panaderia/pan-rallado-y-rebozadores", "https://diaonline.supermercadosdia.com.ar/almacen/picadas",
"https://diaonline.supermercadosdia.com.ar/desayuno", "https://diaonline.supermercadosdia.com.ar/almacen/panaderia/pan-rallado-y-rebozadores",
"https://diaonline.supermercadosdia.com.ar/desayuno/galletitas-y-cereales", "https://diaonline.supermercadosdia.com.ar/desayuno",
"https://diaonline.supermercadosdia.com.ar/desayuno/infusiones-y-endulzantes", "https://diaonline.supermercadosdia.com.ar/desayuno/galletitas-y-cereales",
"https://diaonline.supermercadosdia.com.ar/desayuno/para-untar", "https://diaonline.supermercadosdia.com.ar/desayuno/infusiones-y-endulzantes",
"https://diaonline.supermercadosdia.com.ar/frescos", "https://diaonline.supermercadosdia.com.ar/desayuno/para-untar",
"https://diaonline.supermercadosdia.com.ar/frescos/leches", "https://diaonline.supermercadosdia.com.ar/frescos",
"https://diaonline.supermercadosdia.com.ar/frescos/fiambreria", "https://diaonline.supermercadosdia.com.ar/frescos/leches",
"https://diaonline.supermercadosdia.com.ar/frescos/lacteos", "https://diaonline.supermercadosdia.com.ar/frescos/fiambreria",
"https://diaonline.supermercadosdia.com.ar/frescos/carniceria", "https://diaonline.supermercadosdia.com.ar/frescos/lacteos",
"https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras", "https://diaonline.supermercadosdia.com.ar/frescos/carniceria",
"https://diaonline.supermercadosdia.com.ar/frescos/pastas-frescas", "https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras",
"https://diaonline.supermercadosdia.com.ar/frescos/listos-para-disfrutar", "https://diaonline.supermercadosdia.com.ar/frescos/pastas-frescas",
"https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/frutas", "https://diaonline.supermercadosdia.com.ar/frescos/listos-para-disfrutar",
"https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/verduras", "https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/frutas",
"https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/huevos", "https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/verduras",
"https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/frutos-secos", "https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/huevos",
"https://diaonline.supermercadosdia.com.ar/bebidas", "https://diaonline.supermercadosdia.com.ar/frescos/frutas-y-verduras/frutos-secos",
"https://diaonline.supermercadosdia.com.ar/bebidas/gaseosas", "https://diaonline.supermercadosdia.com.ar/bebidas",
"https://diaonline.supermercadosdia.com.ar/bebidas/cervezas", "https://diaonline.supermercadosdia.com.ar/bebidas/gaseosas",
"https://diaonline.supermercadosdia.com.ar/bebidas/aguas", "https://diaonline.supermercadosdia.com.ar/bebidas/cervezas",
"https://diaonline.supermercadosdia.com.ar/bebidas/bodega", "https://diaonline.supermercadosdia.com.ar/bebidas/aguas",
"https://diaonline.supermercadosdia.com.ar/bebidas/jugos-e-isot%C3%B3nicas", "https://diaonline.supermercadosdia.com.ar/bebidas/bodega",
"https://diaonline.supermercadosdia.com.ar/bebidas/aperitivos", "https://diaonline.supermercadosdia.com.ar/bebidas/jugos-e-isot%C3%B3nicas",
"https://diaonline.supermercadosdia.com.ar/bebidas/bebidas-blancas-y-licores", "https://diaonline.supermercadosdia.com.ar/bebidas/aperitivos",
"https://diaonline.supermercadosdia.com.ar/congelados", "https://diaonline.supermercadosdia.com.ar/bebidas/bebidas-blancas-y-licores",
"https://diaonline.supermercadosdia.com.ar/congelados/hamburguesas-y-medallones", "https://diaonline.supermercadosdia.com.ar/congelados",
"https://diaonline.supermercadosdia.com.ar/congelados/rebozados", "https://diaonline.supermercadosdia.com.ar/congelados/hamburguesas-y-medallones",
"https://diaonline.supermercadosdia.com.ar/congelados/vegetales-congelados", "https://diaonline.supermercadosdia.com.ar/congelados/rebozados",
"https://diaonline.supermercadosdia.com.ar/congelados/postres-congelados", "https://diaonline.supermercadosdia.com.ar/congelados/vegetales-congelados",
"https://diaonline.supermercadosdia.com.ar/congelados/pescaderia", "https://diaonline.supermercadosdia.com.ar/congelados/postres-congelados",
"https://diaonline.supermercadosdia.com.ar/congelados/papas-congeladas", "https://diaonline.supermercadosdia.com.ar/congelados/pescaderia",
"https://diaonline.supermercadosdia.com.ar/congelados/comidas-congeladas", "https://diaonline.supermercadosdia.com.ar/congelados/papas-congeladas",
"https://diaonline.supermercadosdia.com.ar/congelados/hielo", "https://diaonline.supermercadosdia.com.ar/congelados/comidas-congeladas",
"https://diaonline.supermercadosdia.com.ar/limpieza", "https://diaonline.supermercadosdia.com.ar/congelados/hielo",
"https://diaonline.supermercadosdia.com.ar/limpieza/cuidado-de-la-ropa", "https://diaonline.supermercadosdia.com.ar/limpieza",
"https://diaonline.supermercadosdia.com.ar/limpieza/papeleria", "https://diaonline.supermercadosdia.com.ar/limpieza/cuidado-de-la-ropa",
"https://diaonline.supermercadosdia.com.ar/limpieza/limpiadores", "https://diaonline.supermercadosdia.com.ar/limpieza/papeleria",
"https://diaonline.supermercadosdia.com.ar/limpieza/limpieza-de-cocina", "https://diaonline.supermercadosdia.com.ar/limpieza/limpiadores",
"https://diaonline.supermercadosdia.com.ar/limpieza/accesorios-de-limpieza", "https://diaonline.supermercadosdia.com.ar/limpieza/limpieza-de-cocina",
"https://diaonline.supermercadosdia.com.ar/limpieza/desodorantes-de-ambiente", "https://diaonline.supermercadosdia.com.ar/limpieza/accesorios-de-limpieza",
"https://diaonline.supermercadosdia.com.ar/limpieza/insecticidas", "https://diaonline.supermercadosdia.com.ar/limpieza/desodorantes-de-ambiente",
"https://diaonline.supermercadosdia.com.ar/limpieza/fosforos-y-velas", "https://diaonline.supermercadosdia.com.ar/limpieza/insecticidas",
"https://diaonline.supermercadosdia.com.ar/limpieza/bolsas", "https://diaonline.supermercadosdia.com.ar/limpieza/fosforos-y-velas",
"https://diaonline.supermercadosdia.com.ar/4160?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/limpieza/bolsas",
"https://diaonline.supermercadosdia.com.ar/4136?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/4160?map=productClusterIds&order=OrderByBestDiscountDESC",
"https://diaonline.supermercadosdia.com.ar/4143?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/4136?map=productClusterIds&order=OrderByBestDiscountDESC",
"https://diaonline.supermercadosdia.com.ar/4189?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/4143?map=productClusterIds&order=OrderByBestDiscountDESC",
"https://diaonline.supermercadosdia.com.ar/4086?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/4189?map=productClusterIds&order=OrderByBestDiscountDESC",
"https://diaonline.supermercadosdia.com.ar/2089?map=productClusterIds&order=OrderByBestDiscountDESC", "https://diaonline.supermercadosdia.com.ar/4086?map=productClusterIds&order=OrderByBestDiscountDESC",
"https://diaonline.supermercadosdia.com.ar/2089?map=productClusterIds&order=OrderByBestDiscountDESC",
];
export async function scrapDiaProducts() {
await Promise.all([scrapBySite(), scrapBySitemap()]);
}
async function scrapBySitemap() {
// de https://diaonline.supermercadosdia.com.ar/sitemap.xml
const sitemaps = [
"https://diaonline.supermercadosdia.com.ar/sitemap/product-1.xml",
"https://diaonline.supermercadosdia.com.ar/sitemap/product-2.xml",
"https://diaonline.supermercadosdia.com.ar/sitemap/product-3.xml",
"https://diaonline.supermercadosdia.com.ar/sitemap/product-4.xml",
"https://diaonline.supermercadosdia.com.ar/sitemap/product-5.xml",
]; ];
const links = categorias.flatMap( await pMap(sitemaps, async (sitemapUrl) => {
(link) => const res = await fetch(sitemapUrl);
Array.from({ length: 51 }, (x, i) => i).map((i) => { const xml = await res.text();
const url = new URL(link); let urls = new Set<string>();
url.searchParams.set("page", `${i}`); new HTMLRewriter()
return url.toString(); .on("loc", {
text(element) {
const txt = element.text.trim();
if (!txt) return;
urls.add(txt);
},
}) })
.transform(new Response(xml));
saveUrls(Array.from(urls));
});
}
// el order solo carga con el frontend :( async function scrapBySite() {
// .flatMap((link) => const links = categorias.flatMap((link) =>
// [ Array.from({ length: 51 }, (x, i) => i).map((i) => {
// "OrderByNameASC", const url = new URL(link);
// "OrderByNameDESC", url.searchParams.set("page", `${i}`);
// "OrderByTopSaleDESC", return url.toString();
// "OrderByPriceDESC", })
// "OrderByPriceASC",
// "",
// ].map((order) => {
// const url = new URL(link);
// url.searchParams.set("order", order);
// return url.toString();
// })
// )
); );
await pMap( await pMap(
@ -103,8 +119,8 @@ import { getHtml } from "../scraper/fetch.js";
), ),
(a) => new URL(a.href, url).toString() (a) => new URL(a.href, url).toString()
); );
hrefs.forEach((h) => process.stdout.write(h + "\n")); saveUrls(hrefs);
}, },
{ concurrency: 32 } { concurrency: 32 }
); );
})(); }

View file

@ -12,8 +12,6 @@
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"linkedom": "^0.16.5", "linkedom": "^0.16.5",
"p-map": "^7.0.0", "p-map": "^7.0.0"
"tsx": "^4.7.0",
"undici": "^6.2.0"
} }
} }

View file

@ -4,6 +4,7 @@
"workspaces": [ "workspaces": [
"dia-link-scraper", "dia-link-scraper",
"coto-link-scraper", "coto-link-scraper",
"carrefour-link-scraper",
"scraper", "scraper",
"sitio", "sitio",
"db-datos" "db-datos"

View file

@ -1,14 +1,20 @@
import { mkdtemp, access } from "node:fs/promises"; import { mkdtemp, access, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join, resolve } from "node:path"; import { join, resolve } from "node:path";
import { spawn } from "node:child_process"; import { spawn } from "node:child_process";
import { Supermercado } from "db-datos/supermercado.js"; import { Supermercado, hosts } from "db-datos/supermercado.js";
import PQueue from "p-queue"; import PQueue from "p-queue";
import { format, formatDuration, intervalToDuration } from "date-fns"; import { format, formatDuration, intervalToDuration } from "date-fns";
import { parseWarc } from "./scrap.js"; import { parseWarc } from "./scrap.js";
import { S3Client } from "@aws-sdk/client-s3"; import { S3Client } from "@aws-sdk/client-s3";
import { Upload } from "@aws-sdk/lib-storage"; import { Upload } from "@aws-sdk/lib-storage";
import { BunFile } from "bun"; import { BunFile } from "bun";
import { db } from "db-datos/db.js";
import { like } from "drizzle-orm";
import { productoUrls } from "db-datos/schema.js";
import { scrapDiaProducts } from "../dia-link-scraper/index.js";
import { scrapCotoProducts } from "../coto-link-scraper/index.js";
import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js";
const supermercados: Supermercado[] = [ const supermercados: Supermercado[] = [
Supermercado.Carrefour, Supermercado.Carrefour,
@ -71,11 +77,40 @@ class Auto {
} }
async downloadList(supermercado: Supermercado) { async downloadList(supermercado: Supermercado) {
const listPath = resolve(
join(process.env.LISTS_DIR ?? "../data", `${supermercado}.txt`)
);
const date = new Date();
const ctxPath = await mkdtemp(join(tmpdir(), "preciazo-scraper-wget-")); const ctxPath = await mkdtemp(join(tmpdir(), "preciazo-scraper-wget-"));
let listPath: string;
{
const t0 = performance.now();
switch (supermercado) {
case "Dia":
await scrapDiaProducts();
break;
case "Coto":
await scrapCotoProducts();
break;
case "Carrefour":
await scrapCarrefourProducts();
break;
}
this.inform(
`[scrapUrls[${supermercado}]] Tardó ${formatMs(performance.now() - t0)}`
);
}
listPath = join(ctxPath, `lista-${supermercado}.txt`);
const host = Object.entries(hosts).find(
([host, supe]) => supe === supermercado
)![0];
const results = await db.query.productoUrls
.findMany({
where: like(productoUrls.url, `%${host}%`),
})
.execute();
const urls = results.map((r) => r.url);
await writeFile(listPath, urls.join("\n") + "\n");
const date = new Date();
const zstdWarcName = `${supermercado}-${format( const zstdWarcName = `${supermercado}-${format(
date, date,
"yyyy-MM-dd-HH:mm" "yyyy-MM-dd-HH:mm"
@ -98,7 +133,7 @@ class Auto {
const t0 = performance.now(); const t0 = performance.now();
await subproc.exited; await subproc.exited;
this.inform( this.inform(
`wget para ${zstdWarcName} tardó ${formatMs(performance.now() - t0)}` `[wget] ${zstdWarcName} tardó ${formatMs(performance.now() - t0)}`
); );
const gzippedWarcPath = join(ctxPath, "temp.warc.gz"); const gzippedWarcPath = join(ctxPath, "temp.warc.gz");
@ -187,7 +222,6 @@ class Auto {
stdio: ["pipe", null, null], stdio: ["pipe", null, null],
} }
); );
// @ts-expect-error a los types de bun no le gusta????
decompressor.stdout.pipe(compressor.stdin); decompressor.stdout.pipe(compressor.stdin);
compressor.on("close", (code) => { compressor.on("close", (code) => {
if (code !== 0) { if (code !== 0) {

View file

@ -1,32 +1,6 @@
import { request } from "undici";
import { createBrotliDecompress, createUnzip } from "node:zlib";
import { pipeline } from "node:stream/promises";
export async function getHtml(url: string) { export async function getHtml(url: string) {
const res = await request(url, { const res = await fetch(url);
headers: { return readableToBuffer(res.body!);
"Accept-Encoding": "gzip, deflate, br",
},
throwOnError: true,
bodyTimeout: 10 * 60 * 1000,
});
let output: Buffer;
switch (res.headers["content-encoding"]) {
case "gzip":
case "deflate":
output = await pipeline(res.body, createUnzip(), readableToBuffer);
break;
case "br":
output = await pipeline(
res.body,
createBrotliDecompress(),
readableToBuffer
);
break;
default:
output = await readableToBuffer(res.body);
}
return output;
} }
async function readableToBuffer(source: AsyncIterable<any>) { async function readableToBuffer(source: AsyncIterable<any>) {

View file

@ -5,7 +5,8 @@
"description": "", "description": "",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"build:container": "podman build -t gitea.nulo.in/nulo/preciazo/scraper -f ./Containerfile .." "build:container": "podman build -t gitea.nulo.in/nulo/preciazo/scraper -f ./Containerfile ..",
"push:container": "bun build:container && podman push gitea.nulo.in/nulo/preciazo/scraper"
}, },
"keywords": [], "keywords": [],
"author": "", "author": "",
@ -15,11 +16,10 @@
"@aws-sdk/lib-storage": "^3.478.0", "@aws-sdk/lib-storage": "^3.478.0",
"date-fns": "^3.0.6", "date-fns": "^3.0.6",
"db-datos": "workspace:^", "db-datos": "workspace:^",
"drizzle-orm": "^0.29.1", "drizzle-orm": "=0.29.1",
"linkedom": "^0.16.5", "linkedom": "^0.16.5",
"nanoid": "^5.0.4", "nanoid": "^5.0.4",
"p-queue": "^8.0.1", "p-queue": "^8.0.1",
"undici": "^6.2.0",
"warcio": "^2.2.1", "warcio": "^2.2.1",
"zod": "^3.22.4" "zod": "^3.22.4"
}, },

View file

@ -34,10 +34,11 @@ export function getCotoProduct(html: string | Buffer): Precioish {
const ean = getEanFromText(dom); const ean = getEanFromText(dom);
const precioCentavos = getPriceFromText(dom); const precioCentavos = getPriceFromText(dom);
const name = dom.document.querySelector("h1.product_page")?.textContent; const name = dom.document
const imageUrl = dom.document.querySelector<HTMLImageElement>( .querySelector("h1.product_page")
".productImageZoom img" ?.textContent?.trim();
)?.src; const imageUrl =
dom.document.querySelector<HTMLImageElement>(".zoom img")?.src;
return { name, imageUrl, ean, precioCentavos }; return { name, imageUrl, ean, precioCentavos };
} }

View file

@ -1,5 +1,3 @@
import { Database } from "bun:sqlite";
import { drizzle } from "drizzle-orm/bun-sqlite";
import * as schema from "db-datos/schema.js"; import * as schema from "db-datos/schema.js";
import { WARCParser } from "warcio"; import { WARCParser } from "warcio";
import { writeFile } from "fs/promises"; import { writeFile } from "fs/promises";
@ -9,16 +7,10 @@ import { getDiaProduct } from "./parsers/dia.js";
import { getCotoProduct } from "./parsers/coto.js"; import { getCotoProduct } from "./parsers/coto.js";
import { join } from "path"; import { join } from "path";
import { and, eq, sql } from "drizzle-orm"; import { and, eq, sql } from "drizzle-orm";
import { DB_PATH } from "db-datos/drizzle.config.js"; import { db } from "db-datos/db.js";
import { migrateDb } from "db-datos/migrate.js";
const DEBUG = false; const DEBUG = false;
const PARSER_VERSION = 2; const PARSER_VERSION = 4;
migrateDb();
const sqlite = new Database(DB_PATH);
const db = drizzle(sqlite, { schema });
const getPrevPrecio = db const getPrevPrecio = db
.select({ id: schema.precios.id }) .select({ id: schema.precios.id })

View file

@ -1,7 +1,24 @@
FROM docker.io/oven/bun:1-alpine FROM docker.io/oven/bun:1-alpine as build
COPY build/ . RUN apk add --no-cache nodejs
RUN bun i WORKDIR /usr/src/app
EXPOSE 3000 COPY . .
WORKDIR /usr/src/app/sitio
RUN bun install && \
bun run build
# FROM docker.io/oven/bun:1-alpine as deps
# WORKDIR /usr/src/app/sitio
# RUN bun init && bun install "better-sqlite3"@"^9.2.2" "chart.js"@"^4.4.1" "chartjs-adapter-dayjs-4"@"^1.0.4" "dayjs"@"^1.11.10" "drizzle-orm"@"^0.29.1"
# COPY --from=build /usr/src/app/db-datos node_modules/db-datos
FROM docker.io/alpine:3.19
RUN apk add --no-cache tini nodejs npm jq
WORKDIR /app
COPY --from=build /usr/src/app/sitio/package.json package.real.json
RUN sh -c 'echo {\"name\":\"sitio\",\"type\":\"module\",\"dependencies\":$(jq .dependencies < package.real.json)} > package.json' && npm install
COPY --from=build /usr/src/app/db-datos node_modules/db-datos
COPY --from=build /usr/src/app/sitio/build .
# https://github.com/gornostay25/svelte-adapter-bun/issues/39 # https://github.com/gornostay25/svelte-adapter-bun/issues/39
ENV PROTOCOL_HEADER=x-forwarded-proto ENV PROTOCOL_HEADER=x-forwarded-proto
@ -9,5 +26,6 @@ ENV HOST_HEADER=x-forwarded-host
VOLUME /db VOLUME /db
ENV DB_PATH=/db/db.db ENV DB_PATH=/db/db.db
EXPOSE 3000
CMD ["bun", "run", "start"] CMD ["tini", "node", "."]

View file

@ -5,7 +5,7 @@
"scripts": { "scripts": {
"dev": "vite dev", "dev": "vite dev",
"build": "vite build", "build": "vite build",
"build:container": "bun --bun vite build && podman build -t gitea.nulo.in/nulo/preciazo/sitio .", "build:container": "podman build -t gitea.nulo.in/nulo/preciazo/sitio -f ./Containerfile ..",
"push:container": "bun build:container && podman push gitea.nulo.in/nulo/preciazo/sitio", "push:container": "bun build:container && podman push gitea.nulo.in/nulo/preciazo/sitio",
"preview": "vite preview", "preview": "vite preview",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
@ -16,7 +16,6 @@
"devDependencies": { "devDependencies": {
"@sveltejs/kit": "^2.0.0", "@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0",
"@types/bun": "^1.0.0",
"autoprefixer": "^10.4.16", "autoprefixer": "^10.4.16",
"db-datos": "workspace:^", "db-datos": "workspace:^",
"postcss": "^8.4.32", "postcss": "^8.4.32",
@ -25,18 +24,21 @@
"prettier-plugin-svelte": "^3.1.2", "prettier-plugin-svelte": "^3.1.2",
"prettier-plugin-tailwindcss": "^0.5.9", "prettier-plugin-tailwindcss": "^0.5.9",
"svelte": "^4.2.7", "svelte": "^4.2.7",
"svelte-adapter-bun": "^0.5.1",
"svelte-check": "^3.6.0", "svelte-check": "^3.6.0",
"tailwindcss": "^3.3.6", "tailwindcss": "^3.3.6",
"tslib": "^2.4.1", "tslib": "^2.4.1",
"typescript": "^5.0.0", "typescript": "^5.0.0",
"vite": "^5.0.3" "vite": "^5.0.3",
"@sveltejs/adapter-node": "^2.0.2",
"@types/better-sqlite3": "^7.6.8",
"@types/node": "^20.10.6"
}, },
"type": "module", "type": "module",
"dependencies": { "dependencies": {
"better-sqlite3": "^9.2.2",
"chart.js": "^4.4.1", "chart.js": "^4.4.1",
"chartjs-adapter-dayjs-4": "^1.0.4", "chartjs-adapter-dayjs-4": "^1.0.4",
"dayjs": "^1.11.10", "dayjs": "^1.11.10",
"drizzle-orm": "^0.29.1" "drizzle-orm": "=0.29.1"
} }
} }

View file

@ -6,7 +6,10 @@
<meta name="viewport" content="width=device-width, initial-scale=1" /> <meta name="viewport" content="width=device-width, initial-scale=1" />
%sveltekit.head% %sveltekit.head%
</head> </head>
<body data-sveltekit-preload-data="hover"> <body
class="bg-neutral-100 dark:bg-neutral-900 dark:text-neutral-200"
data-sveltekit-preload-data="hover"
>
<div style="display: contents">%sveltekit.body%</div> <div style="display: contents">%sveltekit.body%</div>
</body> </body>
</html> </html>

View file

@ -0,0 +1,8 @@
<script lang="ts">
export let product: { ean: string; name: string; imageUrl: string };
</script>
<a href={`/ean/${product.ean}`} class="flex">
<img src={product.imageUrl} alt={product.name} class="max-h-48" />
<p class="text-xl">{product.name}</p>
</a>

View file

@ -1,9 +1,10 @@
import Database from "bun:sqlite"; import Database from "better-sqlite3";
import { drizzle } from "drizzle-orm/bun-sqlite"; import { drizzle } from "drizzle-orm/better-sqlite3";
import * as schema from "db-datos/schema.js"; import * as schema from "db-datos/schema.js";
import { env } from "$env/dynamic/private"; import { env } from "$env/dynamic/private";
const sqlite = new Database(env.DB_PATH ?? "../scraper/sqlite.db"); const sqlite = new Database(env.DB_PATH ?? "../scraper/sqlite.db");
const db = drizzle(sqlite, { schema });
export const db = drizzle(sqlite, { schema }); export { db };
export * as schema from "db-datos/schema.js"; export * as schema from "db-datos/schema.js";

View file

@ -0,0 +1,14 @@
import { countDistinct } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { db, schema } from "$lib/server/db";
const { precios } = schema;
export const load: PageServerLoad = async () => {
const nProductosR = await db
.select({
count: countDistinct(precios.ean),
})
.from(precios);
const nProductos = nProductosR[0].count;
return { nProductos };
};

View file

@ -1,5 +1,43 @@
<script> <script lang="ts">
import "../app.pcss"; import "../app.pcss";
import type { PageData } from "./$types";
export let data: PageData;
</script> </script>
<!-- https://flowbite.com/docs/forms/search-input/ -->
<form method="GET" action="/search">
<div class="flex items-stretch p-4">
<input
type="search"
name="q"
class="block w-full rounded-l-lg border border-gray-300 bg-gray-50 p-2.5 text-sm text-gray-900 focus:border-blue-500 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-700 dark:text-white dark:placeholder-gray-400 dark:focus:border-blue-500"
placeholder={`Buscar entre ${data.nProductos} productos`}
required
/>
<button
type="submit"
class="block rounded-e-lg border border-blue-700 bg-blue-700 p-2.5 text-sm font-medium text-white hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800"
>
<svg
class="h-4 w-4"
aria-hidden="true"
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 20 20"
>
<path
stroke="currentColor"
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="m19 19-4-4m0-7A7 7 0 1 1 1 8a7 7 0 0 1 14 0Z"
/>
</svg>
<span class="sr-only">Search</span>
</button>
</div>
</form>
<slot /> <slot />

View file

@ -1,4 +1,3 @@
import { error } from "@sveltejs/kit";
import type { PageServerLoad } from "./$types"; import type { PageServerLoad } from "./$types";
import { db, schema } from "$lib/server/db"; import { db, schema } from "$lib/server/db";
const { precios } = schema; const { precios } = schema;
@ -6,7 +5,11 @@ import { sql } from "drizzle-orm";
export const load: PageServerLoad = async ({ params }) => { export const load: PageServerLoad = async ({ params }) => {
const q = db const q = db
.select({ ean: precios.ean, name: precios.name }) .select({
ean: precios.ean,
name: precios.name,
imageUrl: precios.imageUrl,
})
.from(precios) .from(precios)
.groupBy(precios.ean) .groupBy(precios.ean)
.having(sql`max(length(name))`) .having(sql`max(length(name))`)

View file

@ -1,4 +1,5 @@
<script lang="ts"> <script lang="ts">
import ProductPreview from "$lib/ProductPreview.svelte";
import type { PageData } from "./$types"; import type { PageData } from "./$types";
export let data: PageData; export let data: PageData;
@ -30,12 +31,10 @@
<section> <section>
<h2 class="text-lg font-bold">Random</h2> <h2 class="text-lg font-bold">Random</h2>
<ul> <ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
{#each data.precios as product} {#each data.precios as product}
<li> <li>
<a href={`/ean/${product.ean}`}> <ProductPreview {product} />
{product.name}
</a>
</li> </li>
{/each} {/each}
</ul> </ul>

View file

@ -22,7 +22,7 @@
{#if data.meta} {#if data.meta}
<h1 class="text-3xl font-bold">{data.meta.name}</h1> <h1 class="text-3xl font-bold">{data.meta.name}</h1>
<img src={data.meta.imageUrl} class="max-h-48" /> <img src={data.meta.imageUrl} alt={data.meta.name} class="max-h-48" />
<div class="flex gap-2"> <div class="flex gap-2">
{#each urls as [supermercado, url]} {#each urls as [supermercado, url]}
<a <a

View file

@ -42,7 +42,7 @@
} }
</script> </script>
<div class="h-[300px] w-full min-w-[500px]"> <div class="h-[300px] w-full min-w-[500px] bg-neutral-200 dark:invert">
<ChartJs <ChartJs
type="line" type="line"
data={{ datasets }} data={{ datasets }}

View file

@ -0,0 +1,19 @@
import { error } from "@sveltejs/kit";
import { eq, max, sql } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { db, schema } from "$lib/server/db";
const { precios } = schema;
export const load: PageServerLoad = async ({ url }) => {
const query = url.searchParams.get("q");
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
if (query) {
results = db.all(
sql`select p.ean, p.name, p.image_url as imageUrl from precios_fts f
join precios p on p.ean = f.ean
where f.name match ${query};`,
);
}
return { query, results };
};

View file

@ -0,0 +1,21 @@
<script lang="ts">
import ProductPreview from "$lib/ProductPreview.svelte";
import type { PageData } from "./$types";
export let data: PageData;
</script>
{#if data.results}
<header class="my-2">
<h1 class="text-2xl font-bold">Resultados para "{data.query}"</h1>
</header>
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
{#each data.results as product}
<li>
<ProductPreview {product} />
</li>
{/each}
</ul>
{:else}
Probá buscando algo.
{/if}

View file

@ -1,5 +1,5 @@
// import adapter from "@sveltejs/adapter-node"; import adapter from "@sveltejs/adapter-node";
import adapter from "svelte-adapter-bun"; // import adapter from "svelte-adapter-bun";
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
/** @type {import('@sveltejs/kit').Config} */ /** @type {import('@sveltejs/kit').Config} */