mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-29 21:16:19 +00:00
Compare commits
No commits in common. "5dcc901a80fb765995bc795f3b009616dd404768" and "198e51fc97e799510ca725dfc597c9922af07e46" have entirely different histories.
5dcc901a80
...
198e51fc97
45 changed files with 201 additions and 1159 deletions
BIN
bun.lockb
BIN
bun.lockb
Binary file not shown.
|
@ -1,44 +0,0 @@
|
|||
import pMap from "p-map";
|
||||
import { saveUrls } from "db-datos/urlHelpers.js";
|
||||
|
||||
await scrapBySitemap();
|
||||
|
||||
export async function scrapCarrefourProducts() {
|
||||
await scrapBySitemap();
|
||||
}
|
||||
|
||||
async function scrapBySitemap() {
|
||||
// de https://www.carrefour.com.ar/sitemap.xml
|
||||
const sitemaps = [
|
||||
"https://www.carrefour.com.ar/sitemap/product-0.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-1.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-2.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-3.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-4.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-5.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-6.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-7.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-8.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-9.xml",
|
||||
];
|
||||
|
||||
await pMap(
|
||||
sitemaps,
|
||||
async (sitemapUrl) => {
|
||||
const res = await fetch(sitemapUrl);
|
||||
const xml = await res.text();
|
||||
let urls = new Set<string>();
|
||||
new HTMLRewriter()
|
||||
.on("loc", {
|
||||
text(element) {
|
||||
const txt = element.text.trim();
|
||||
if (!txt) return;
|
||||
urls.add(txt);
|
||||
},
|
||||
})
|
||||
.transform(new Response(xml));
|
||||
saveUrls(Array.from(urls));
|
||||
},
|
||||
{ concurrency: 3 }
|
||||
);
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
{
|
||||
"name": "carrefour-link-scraper",
|
||||
"type": "module",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"linkedom": "^0.16.5",
|
||||
"p-map": "^7.0.1"
|
||||
}
|
||||
}
|
|
@ -1,24 +1,23 @@
|
|||
import { getHtml } from "../scraper/fetch.js";
|
||||
import { parseHTML } from "linkedom";
|
||||
import PQueue from "p-queue";
|
||||
import { saveUrls } from "db-datos/urlHelpers.js";
|
||||
|
||||
export async function scrapCotoProducts() {
|
||||
// let fetched = new Set<string>();
|
||||
{
|
||||
const initial =
|
||||
"https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&No=2200&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29&Nrpp=200";
|
||||
|
||||
const queue = new PQueue({ concurrency: 4 });
|
||||
const queue = new PQueue({ concurrency: 2 });
|
||||
|
||||
const pageSize = 300; // hasta 1000
|
||||
const links = Array.from(
|
||||
{ length: Math.ceil(29000 / pageSize) },
|
||||
(x, i) => i
|
||||
).map((i) => {
|
||||
const links = Array.from({ length: Math.ceil(29000 / 300) }, (x, i) => i).map(
|
||||
(i) => {
|
||||
const url = new URL(initial);
|
||||
url.searchParams.set("No", `${i * pageSize}`);
|
||||
url.searchParams.set("Nrpp", `${pageSize}`);
|
||||
return url.toString();
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
const promises = links.map((l) => queue.add(getPage(l)));
|
||||
await Promise.all(promises);
|
||||
|
@ -39,6 +38,22 @@ function getPage(url: string) {
|
|||
document.querySelectorAll<HTMLAnchorElement>(".product_info_container a"),
|
||||
(a) => new URL(a.href, url).toString()
|
||||
);
|
||||
saveUrls(hrefs);
|
||||
hrefs.forEach((h) => process.stdout.write(h + "\n"));
|
||||
|
||||
// const nextLinks = Array.from(
|
||||
// document.querySelectorAll<HTMLAnchorElement>(
|
||||
// "#atg_store_pagination a[href]"
|
||||
// ),
|
||||
// (a) => new URL(a.href, url).toString()
|
||||
// );
|
||||
|
||||
// await Promise.all(
|
||||
// nextLinks
|
||||
// .filter((l) => !fetched.has(l))
|
||||
// .map((l) => {
|
||||
// fetched.add(l);
|
||||
// return queue.add(getPage(l));
|
||||
// })
|
||||
// );
|
||||
};
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"linkedom": "^0.16.5",
|
||||
"p-queue": "^8.0.1"
|
||||
"p-queue": "^8.0.1",
|
||||
"tsx": "^4.7.0",
|
||||
"undici": "^6.2.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
import { Database } from "bun:sqlite";
|
||||
import { drizzle } from "drizzle-orm/bun-sqlite";
|
||||
import { DB_PATH } from "./drizzle.config.js";
|
||||
import { migrateDb } from "./migrate.js";
|
||||
import * as schema from "./schema.js";
|
||||
|
||||
migrateDb();
|
||||
|
||||
export const sqlite = new Database(DB_PATH);
|
||||
export const db = drizzle(sqlite, { schema });
|
|
@ -1,3 +0,0 @@
|
|||
-- Custom SQL migration file, put you code below! --
|
||||
create virtual table precios_fts using fts5(ean, url, name, content=precios, content_rowid=id);
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
-- Custom SQL migration file, put you code below! --
|
||||
insert into precios_fts(rowid,ean,url,name) select id,ean,url,name from precios;
|
|
@ -1,7 +0,0 @@
|
|||
-- Custom SQL migration file, put you code below! --
|
||||
|
||||
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
|
||||
-- Triggers to keep the FTS index up to date.
|
||||
CREATE TRIGGER precios_fts_ai AFTER INSERT ON precios BEGIN
|
||||
INSERT INTO precios_fts(rowid, ean, url, name) VALUES (new.id, new.ean, new.url, new.name);
|
||||
END;
|
|
@ -1,6 +0,0 @@
|
|||
-- Custom SQL migration file, put you code below! --
|
||||
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
|
||||
-- Triggers to keep the FTS index up to date.
|
||||
CREATE TRIGGER precios_fts_ad AFTER DELETE ON precios BEGIN
|
||||
INSERT INTO precios_fts(precios_fts, rowid, ean, url, name) VALUES('delete', old.id, old.ean, old.url, old.name);
|
||||
END;
|
|
@ -1,8 +0,0 @@
|
|||
-- Custom SQL migration file, put you code below! --
|
||||
|
||||
-- https://sqlite.org/fts5.html#external_content_and_contentless_tables
|
||||
-- Triggers to keep the FTS index up to date.
|
||||
CREATE TRIGGER precios_fts_au AFTER UPDATE ON precios BEGIN
|
||||
INSERT INTO precios_fts(precios_fts, rowid, ean, url, name) VALUES('delete', old.id, old.ean, old.url, old.name);
|
||||
INSERT INTO precios_fts(rowid, ean, url, name) VALUES (new.id, new.ean, new.url, new.name);
|
||||
END;
|
|
@ -1,8 +0,0 @@
|
|||
CREATE TABLE `producto_urls` (
|
||||
`id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
`url` text NOT NULL,
|
||||
`first_seen` integer NOT NULL,
|
||||
`last_seen` integer NOT NULL
|
||||
);
|
||||
--> statement-breakpoint
|
||||
CREATE UNIQUE INDEX `producto_urls_url_unique` ON `producto_urls` (`url`);
|
|
@ -1,101 +0,0 @@
|
|||
{
|
||||
"id": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
|
||||
"prevId": "e1217fdb-6f54-44c5-a04b-c5aebf202102",
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
{
|
||||
"id": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
|
||||
"prevId": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
{
|
||||
"id": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
|
||||
"prevId": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
{
|
||||
"id": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
|
||||
"prevId": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
{
|
||||
"id": "082630a9-3744-4e33-bde5-06045ca57d36",
|
||||
"prevId": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
|
@ -1,146 +0,0 @@
|
|||
{
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"id": "2e398920-ffaf-4d55-ae13-d906cb9e0efa",
|
||||
"prevId": "082630a9-3744-4e33-bde5-06045ca57d36",
|
||||
"tables": {
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
},
|
||||
"producto_urls": {
|
||||
"name": "producto_urls",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"first_seen": {
|
||||
"name": "first_seen",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_seen": {
|
||||
"name": "last_seen",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"producto_urls_url_unique": {
|
||||
"name": "producto_urls_url_unique",
|
||||
"columns": [
|
||||
"url"
|
||||
],
|
||||
"isUnique": true
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
}
|
||||
}
|
|
@ -29,48 +29,6 @@
|
|||
"when": 1703521964385,
|
||||
"tag": "0003_abandoned_landau",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 4,
|
||||
"version": "5",
|
||||
"when": 1703726748364,
|
||||
"tag": "0004_left_wolfsbane",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 5,
|
||||
"version": "5",
|
||||
"when": 1703807455551,
|
||||
"tag": "0005_lucky_epoch",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 6,
|
||||
"version": "5",
|
||||
"when": 1703807457204,
|
||||
"tag": "0006_jazzy_madripoor",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 7,
|
||||
"version": "5",
|
||||
"when": 1703807458666,
|
||||
"tag": "0007_bright_silvermane",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 8,
|
||||
"version": "5",
|
||||
"when": 1703807460152,
|
||||
"tag": "0008_funny_nighthawk",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 9,
|
||||
"version": "5",
|
||||
"when": 1703895109501,
|
||||
"tag": "0009_breezy_forge",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
|
@ -1,16 +1,15 @@
|
|||
import Database from "bun:sqlite";
|
||||
import { join, dirname } from "node:path";
|
||||
import { join } from "node:path";
|
||||
import { drizzle } from "drizzle-orm/bun-sqlite";
|
||||
import { migrate } from "drizzle-orm/bun-sqlite/migrator";
|
||||
import * as schema from "./schema.js";
|
||||
import { DB_PATH } from "./drizzle.config.js";
|
||||
|
||||
const url = new URL(import.meta.url);
|
||||
export function migrateDb() {
|
||||
const sqlite = new Database(DB_PATH);
|
||||
const db = drizzle(sqlite, { schema });
|
||||
|
||||
migrate(db, { migrationsFolder: join(dirname(url.pathname), "drizzle") });
|
||||
migrate(db, { migrationsFolder: join(import.meta.dir, "drizzle") });
|
||||
sqlite.run(`
|
||||
pragma journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"drizzle-orm": "=0.29.1"
|
||||
"drizzle-orm": "^0.29.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "^1.0.0",
|
||||
|
|
|
@ -22,12 +22,3 @@ export const precios = sqliteTable(
|
|||
);
|
||||
|
||||
export type Precio = typeof precios.$inferSelect;
|
||||
|
||||
export const productoUrls = sqliteTable("producto_urls", {
|
||||
id: integer("id", { mode: "number" }).primaryKey({ autoIncrement: true }),
|
||||
url: text("url").unique().notNull(),
|
||||
firstSeen: integer("first_seen", { mode: "timestamp" }).notNull(),
|
||||
lastSeen: integer("last_seen", { mode: "timestamp" }).notNull(),
|
||||
});
|
||||
|
||||
export type ProductUrl = typeof productoUrls.$inferSelect;
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
import { sql } from "drizzle-orm";
|
||||
import { db } from "./db.js";
|
||||
import { productoUrls } from "./schema.js";
|
||||
|
||||
export function saveUrls(urls: string[]) {
|
||||
db.transaction((tx) => {
|
||||
const now = new Date();
|
||||
const insertUrlTra = tx
|
||||
.insert(productoUrls)
|
||||
.values({
|
||||
url: sql.placeholder("url"),
|
||||
firstSeen: now,
|
||||
lastSeen: now,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: productoUrls.url,
|
||||
set: { lastSeen: now },
|
||||
})
|
||||
.prepare();
|
||||
|
||||
for (const href of urls) {
|
||||
insertUrlTra.run({ url: href });
|
||||
}
|
||||
});
|
||||
}
|
|
@ -1,8 +1,7 @@
|
|||
import pMap from "p-map";
|
||||
import { parseHTML } from "linkedom";
|
||||
import { getHtml } from "../scraper/fetch.js";
|
||||
import { saveUrls } from "db-datos/urlHelpers.js";
|
||||
|
||||
(async () => {
|
||||
const categorias = [
|
||||
"https://diaonline.supermercadosdia.com.ar/almacen",
|
||||
"https://diaonline.supermercadosdia.com.ar/almacen/conservas",
|
||||
|
@ -67,44 +66,29 @@ const categorias = [
|
|||
"https://diaonline.supermercadosdia.com.ar/2089?map=productClusterIds&order=OrderByBestDiscountDESC",
|
||||
];
|
||||
|
||||
export async function scrapDiaProducts() {
|
||||
await Promise.all([scrapBySite(), scrapBySitemap()]);
|
||||
}
|
||||
|
||||
async function scrapBySitemap() {
|
||||
// de https://diaonline.supermercadosdia.com.ar/sitemap.xml
|
||||
const sitemaps = [
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-1.xml",
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-2.xml",
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-3.xml",
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-4.xml",
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-5.xml",
|
||||
];
|
||||
|
||||
await pMap(sitemaps, async (sitemapUrl) => {
|
||||
const res = await fetch(sitemapUrl);
|
||||
const xml = await res.text();
|
||||
let urls = new Set<string>();
|
||||
new HTMLRewriter()
|
||||
.on("loc", {
|
||||
text(element) {
|
||||
const txt = element.text.trim();
|
||||
if (!txt) return;
|
||||
urls.add(txt);
|
||||
},
|
||||
})
|
||||
.transform(new Response(xml));
|
||||
saveUrls(Array.from(urls));
|
||||
});
|
||||
}
|
||||
|
||||
async function scrapBySite() {
|
||||
const links = categorias.flatMap((link) =>
|
||||
const links = categorias.flatMap(
|
||||
(link) =>
|
||||
Array.from({ length: 51 }, (x, i) => i).map((i) => {
|
||||
const url = new URL(link);
|
||||
url.searchParams.set("page", `${i}`);
|
||||
return url.toString();
|
||||
})
|
||||
|
||||
// el order solo carga con el frontend :(
|
||||
// .flatMap((link) =>
|
||||
// [
|
||||
// "OrderByNameASC",
|
||||
// "OrderByNameDESC",
|
||||
// "OrderByTopSaleDESC",
|
||||
// "OrderByPriceDESC",
|
||||
// "OrderByPriceASC",
|
||||
// "",
|
||||
// ].map((order) => {
|
||||
// const url = new URL(link);
|
||||
// url.searchParams.set("order", order);
|
||||
// return url.toString();
|
||||
// })
|
||||
// )
|
||||
);
|
||||
|
||||
await pMap(
|
||||
|
@ -119,8 +103,8 @@ async function scrapBySite() {
|
|||
),
|
||||
(a) => new URL(a.href, url).toString()
|
||||
);
|
||||
saveUrls(hrefs);
|
||||
hrefs.forEach((h) => process.stdout.write(h + "\n"));
|
||||
},
|
||||
{ concurrency: 32 }
|
||||
);
|
||||
}
|
||||
})();
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"linkedom": "^0.16.5",
|
||||
"p-map": "^7.0.0"
|
||||
"p-map": "^7.0.0",
|
||||
"tsx": "^4.7.0",
|
||||
"undici": "^6.2.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
"workspaces": [
|
||||
"dia-link-scraper",
|
||||
"coto-link-scraper",
|
||||
"carrefour-link-scraper",
|
||||
"scraper",
|
||||
"sitio",
|
||||
"db-datos"
|
||||
|
|
|
@ -1,20 +1,14 @@
|
|||
import { mkdtemp, access, writeFile } from "node:fs/promises";
|
||||
import { mkdtemp, access } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join, resolve } from "node:path";
|
||||
import { spawn } from "node:child_process";
|
||||
import { Supermercado, hosts } from "db-datos/supermercado.js";
|
||||
import { Supermercado } from "db-datos/supermercado.js";
|
||||
import PQueue from "p-queue";
|
||||
import { format, formatDuration, intervalToDuration } from "date-fns";
|
||||
import { parseWarc } from "./scrap.js";
|
||||
import { S3Client } from "@aws-sdk/client-s3";
|
||||
import { Upload } from "@aws-sdk/lib-storage";
|
||||
import { BunFile } from "bun";
|
||||
import { db } from "db-datos/db.js";
|
||||
import { like } from "drizzle-orm";
|
||||
import { productoUrls } from "db-datos/schema.js";
|
||||
import { scrapDiaProducts } from "../dia-link-scraper/index.js";
|
||||
import { scrapCotoProducts } from "../coto-link-scraper/index.js";
|
||||
import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js";
|
||||
|
||||
const supermercados: Supermercado[] = [
|
||||
Supermercado.Carrefour,
|
||||
|
@ -77,40 +71,11 @@ class Auto {
|
|||
}
|
||||
|
||||
async downloadList(supermercado: Supermercado) {
|
||||
const ctxPath = await mkdtemp(join(tmpdir(), "preciazo-scraper-wget-"));
|
||||
|
||||
let listPath: string;
|
||||
{
|
||||
const t0 = performance.now();
|
||||
switch (supermercado) {
|
||||
case "Dia":
|
||||
await scrapDiaProducts();
|
||||
break;
|
||||
case "Coto":
|
||||
await scrapCotoProducts();
|
||||
break;
|
||||
case "Carrefour":
|
||||
await scrapCarrefourProducts();
|
||||
break;
|
||||
}
|
||||
this.inform(
|
||||
`[scrapUrls[${supermercado}]] Tardó ${formatMs(performance.now() - t0)}`
|
||||
const listPath = resolve(
|
||||
join(process.env.LISTS_DIR ?? "../data", `${supermercado}.txt`)
|
||||
);
|
||||
}
|
||||
|
||||
listPath = join(ctxPath, `lista-${supermercado}.txt`);
|
||||
const host = Object.entries(hosts).find(
|
||||
([host, supe]) => supe === supermercado
|
||||
)![0];
|
||||
const results = await db.query.productoUrls
|
||||
.findMany({
|
||||
where: like(productoUrls.url, `%${host}%`),
|
||||
})
|
||||
.execute();
|
||||
const urls = results.map((r) => r.url);
|
||||
await writeFile(listPath, urls.join("\n") + "\n");
|
||||
|
||||
const date = new Date();
|
||||
const ctxPath = await mkdtemp(join(tmpdir(), "preciazo-scraper-wget-"));
|
||||
const zstdWarcName = `${supermercado}-${format(
|
||||
date,
|
||||
"yyyy-MM-dd-HH:mm"
|
||||
|
@ -133,7 +98,7 @@ class Auto {
|
|||
const t0 = performance.now();
|
||||
await subproc.exited;
|
||||
this.inform(
|
||||
`[wget] ${zstdWarcName} tardó ${formatMs(performance.now() - t0)}`
|
||||
`wget para ${zstdWarcName} tardó ${formatMs(performance.now() - t0)}`
|
||||
);
|
||||
|
||||
const gzippedWarcPath = join(ctxPath, "temp.warc.gz");
|
||||
|
@ -222,6 +187,7 @@ class Auto {
|
|||
stdio: ["pipe", null, null],
|
||||
}
|
||||
);
|
||||
// @ts-expect-error a los types de bun no le gusta????
|
||||
decompressor.stdout.pipe(compressor.stdin);
|
||||
compressor.on("close", (code) => {
|
||||
if (code !== 0) {
|
||||
|
|
|
@ -1,6 +1,32 @@
|
|||
import { request } from "undici";
|
||||
import { createBrotliDecompress, createUnzip } from "node:zlib";
|
||||
import { pipeline } from "node:stream/promises";
|
||||
|
||||
export async function getHtml(url: string) {
|
||||
const res = await fetch(url);
|
||||
return readableToBuffer(res.body!);
|
||||
const res = await request(url, {
|
||||
headers: {
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
},
|
||||
throwOnError: true,
|
||||
bodyTimeout: 10 * 60 * 1000,
|
||||
});
|
||||
let output: Buffer;
|
||||
switch (res.headers["content-encoding"]) {
|
||||
case "gzip":
|
||||
case "deflate":
|
||||
output = await pipeline(res.body, createUnzip(), readableToBuffer);
|
||||
break;
|
||||
case "br":
|
||||
output = await pipeline(
|
||||
res.body,
|
||||
createBrotliDecompress(),
|
||||
readableToBuffer
|
||||
);
|
||||
break;
|
||||
default:
|
||||
output = await readableToBuffer(res.body);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
async function readableToBuffer(source: AsyncIterable<any>) {
|
||||
|
|
|
@ -5,8 +5,7 @@
|
|||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"build:container": "podman build -t gitea.nulo.in/nulo/preciazo/scraper -f ./Containerfile ..",
|
||||
"push:container": "bun build:container && podman push gitea.nulo.in/nulo/preciazo/scraper"
|
||||
"build:container": "podman build -t gitea.nulo.in/nulo/preciazo/scraper -f ./Containerfile .."
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
|
@ -16,10 +15,11 @@
|
|||
"@aws-sdk/lib-storage": "^3.478.0",
|
||||
"date-fns": "^3.0.6",
|
||||
"db-datos": "workspace:^",
|
||||
"drizzle-orm": "=0.29.1",
|
||||
"drizzle-orm": "^0.29.1",
|
||||
"linkedom": "^0.16.5",
|
||||
"nanoid": "^5.0.4",
|
||||
"p-queue": "^8.0.1",
|
||||
"undici": "^6.2.0",
|
||||
"warcio": "^2.2.1",
|
||||
"zod": "^3.22.4"
|
||||
},
|
||||
|
|
|
@ -34,11 +34,10 @@ export function getCotoProduct(html: string | Buffer): Precioish {
|
|||
const ean = getEanFromText(dom);
|
||||
const precioCentavos = getPriceFromText(dom);
|
||||
|
||||
const name = dom.document
|
||||
.querySelector("h1.product_page")
|
||||
?.textContent?.trim();
|
||||
const imageUrl =
|
||||
dom.document.querySelector<HTMLImageElement>(".zoom img")?.src;
|
||||
const name = dom.document.querySelector("h1.product_page")?.textContent;
|
||||
const imageUrl = dom.document.querySelector<HTMLImageElement>(
|
||||
".productImageZoom img"
|
||||
)?.src;
|
||||
|
||||
return { name, imageUrl, ean, precioCentavos };
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import { Database } from "bun:sqlite";
|
||||
import { drizzle } from "drizzle-orm/bun-sqlite";
|
||||
import * as schema from "db-datos/schema.js";
|
||||
import { WARCParser } from "warcio";
|
||||
import { writeFile } from "fs/promises";
|
||||
|
@ -7,10 +9,16 @@ import { getDiaProduct } from "./parsers/dia.js";
|
|||
import { getCotoProduct } from "./parsers/coto.js";
|
||||
import { join } from "path";
|
||||
import { and, eq, sql } from "drizzle-orm";
|
||||
import { db } from "db-datos/db.js";
|
||||
import { DB_PATH } from "db-datos/drizzle.config.js";
|
||||
import { migrateDb } from "db-datos/migrate.js";
|
||||
|
||||
const DEBUG = false;
|
||||
const PARSER_VERSION = 4;
|
||||
const PARSER_VERSION = 2;
|
||||
|
||||
migrateDb();
|
||||
|
||||
const sqlite = new Database(DB_PATH);
|
||||
const db = drizzle(sqlite, { schema });
|
||||
|
||||
const getPrevPrecio = db
|
||||
.select({ id: schema.precios.id })
|
||||
|
|
|
@ -1,24 +1,7 @@
|
|||
FROM docker.io/oven/bun:1-alpine as build
|
||||
RUN apk add --no-cache nodejs
|
||||
WORKDIR /usr/src/app
|
||||
COPY . .
|
||||
WORKDIR /usr/src/app/sitio
|
||||
RUN bun install && \
|
||||
bun run build
|
||||
|
||||
# FROM docker.io/oven/bun:1-alpine as deps
|
||||
# WORKDIR /usr/src/app/sitio
|
||||
# RUN bun init && bun install "better-sqlite3"@"^9.2.2" "chart.js"@"^4.4.1" "chartjs-adapter-dayjs-4"@"^1.0.4" "dayjs"@"^1.11.10" "drizzle-orm"@"^0.29.1"
|
||||
# COPY --from=build /usr/src/app/db-datos node_modules/db-datos
|
||||
|
||||
FROM docker.io/alpine:3.19
|
||||
RUN apk add --no-cache tini nodejs npm jq
|
||||
|
||||
WORKDIR /app
|
||||
COPY --from=build /usr/src/app/sitio/package.json package.real.json
|
||||
RUN sh -c 'echo {\"name\":\"sitio\",\"type\":\"module\",\"dependencies\":$(jq .dependencies < package.real.json)} > package.json' && npm install
|
||||
COPY --from=build /usr/src/app/db-datos node_modules/db-datos
|
||||
COPY --from=build /usr/src/app/sitio/build .
|
||||
FROM docker.io/oven/bun:1-alpine
|
||||
COPY build/ .
|
||||
RUN bun i
|
||||
EXPOSE 3000
|
||||
|
||||
# https://github.com/gornostay25/svelte-adapter-bun/issues/39
|
||||
ENV PROTOCOL_HEADER=x-forwarded-proto
|
||||
|
@ -26,6 +9,5 @@ ENV HOST_HEADER=x-forwarded-host
|
|||
|
||||
VOLUME /db
|
||||
ENV DB_PATH=/db/db.db
|
||||
EXPOSE 3000
|
||||
|
||||
CMD ["tini", "node", "."]
|
||||
CMD ["bun", "run", "start"]
|
|
@ -5,7 +5,7 @@
|
|||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
"build": "vite build",
|
||||
"build:container": "podman build -t gitea.nulo.in/nulo/preciazo/sitio -f ./Containerfile ..",
|
||||
"build:container": "bun --bun vite build && podman build -t gitea.nulo.in/nulo/preciazo/sitio .",
|
||||
"push:container": "bun build:container && podman push gitea.nulo.in/nulo/preciazo/sitio",
|
||||
"preview": "vite preview",
|
||||
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
||||
|
@ -16,6 +16,7 @@
|
|||
"devDependencies": {
|
||||
"@sveltejs/kit": "^2.0.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^3.0.0",
|
||||
"@types/bun": "^1.0.0",
|
||||
"autoprefixer": "^10.4.16",
|
||||
"db-datos": "workspace:^",
|
||||
"postcss": "^8.4.32",
|
||||
|
@ -24,21 +25,18 @@
|
|||
"prettier-plugin-svelte": "^3.1.2",
|
||||
"prettier-plugin-tailwindcss": "^0.5.9",
|
||||
"svelte": "^4.2.7",
|
||||
"svelte-adapter-bun": "^0.5.1",
|
||||
"svelte-check": "^3.6.0",
|
||||
"tailwindcss": "^3.3.6",
|
||||
"tslib": "^2.4.1",
|
||||
"typescript": "^5.0.0",
|
||||
"vite": "^5.0.3",
|
||||
"@sveltejs/adapter-node": "^2.0.2",
|
||||
"@types/better-sqlite3": "^7.6.8",
|
||||
"@types/node": "^20.10.6"
|
||||
"vite": "^5.0.3"
|
||||
},
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^9.2.2",
|
||||
"chart.js": "^4.4.1",
|
||||
"chartjs-adapter-dayjs-4": "^1.0.4",
|
||||
"dayjs": "^1.11.10",
|
||||
"drizzle-orm": "=0.29.1"
|
||||
"drizzle-orm": "^0.29.1"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,10 +6,7 @@
|
|||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
%sveltekit.head%
|
||||
</head>
|
||||
<body
|
||||
class="bg-neutral-100 dark:bg-neutral-900 dark:text-neutral-200"
|
||||
data-sveltekit-preload-data="hover"
|
||||
>
|
||||
<body data-sveltekit-preload-data="hover">
|
||||
<div style="display: contents">%sveltekit.body%</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
<script lang="ts">
|
||||
export let product: { ean: string; name: string; imageUrl: string };
|
||||
</script>
|
||||
|
||||
<a href={`/ean/${product.ean}`} class="flex">
|
||||
<img src={product.imageUrl} alt={product.name} class="max-h-48" />
|
||||
<p class="text-xl">{product.name}</p>
|
||||
</a>
|
|
@ -1,10 +1,9 @@
|
|||
import Database from "better-sqlite3";
|
||||
import { drizzle } from "drizzle-orm/better-sqlite3";
|
||||
import Database from "bun:sqlite";
|
||||
import { drizzle } from "drizzle-orm/bun-sqlite";
|
||||
import * as schema from "db-datos/schema.js";
|
||||
import { env } from "$env/dynamic/private";
|
||||
|
||||
const sqlite = new Database(env.DB_PATH ?? "../scraper/sqlite.db");
|
||||
const db = drizzle(sqlite, { schema });
|
||||
|
||||
export { db };
|
||||
export const db = drizzle(sqlite, { schema });
|
||||
export * as schema from "db-datos/schema.js";
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
import { countDistinct } from "drizzle-orm";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
|
||||
export const load: PageServerLoad = async () => {
|
||||
const nProductosR = await db
|
||||
.select({
|
||||
count: countDistinct(precios.ean),
|
||||
})
|
||||
.from(precios);
|
||||
const nProductos = nProductosR[0].count;
|
||||
return { nProductos };
|
||||
};
|
|
@ -1,43 +1,5 @@
|
|||
<script lang="ts">
|
||||
<script>
|
||||
import "../app.pcss";
|
||||
|
||||
import type { PageData } from "./$types";
|
||||
|
||||
export let data: PageData;
|
||||
</script>
|
||||
|
||||
<!-- https://flowbite.com/docs/forms/search-input/ -->
|
||||
<form method="GET" action="/search">
|
||||
<div class="flex items-stretch p-4">
|
||||
<input
|
||||
type="search"
|
||||
name="q"
|
||||
class="block w-full rounded-l-lg border border-gray-300 bg-gray-50 p-2.5 text-sm text-gray-900 focus:border-blue-500 focus:ring-blue-500 dark:border-gray-600 dark:bg-gray-700 dark:text-white dark:placeholder-gray-400 dark:focus:border-blue-500"
|
||||
placeholder={`Buscar entre ${data.nProductos} productos`}
|
||||
required
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
class="block rounded-e-lg border border-blue-700 bg-blue-700 p-2.5 text-sm font-medium text-white hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800"
|
||||
>
|
||||
<svg
|
||||
class="h-4 w-4"
|
||||
aria-hidden="true"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 20 20"
|
||||
>
|
||||
<path
|
||||
stroke="currentColor"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
stroke-width="2"
|
||||
d="m19 19-4-4m0-7A7 7 0 1 1 1 8a7 7 0 0 1 14 0Z"
|
||||
/>
|
||||
</svg>
|
||||
<span class="sr-only">Search</span>
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<slot />
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import { error } from "@sveltejs/kit";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
|
@ -5,11 +6,7 @@ import { sql } from "drizzle-orm";
|
|||
|
||||
export const load: PageServerLoad = async ({ params }) => {
|
||||
const q = db
|
||||
.select({
|
||||
ean: precios.ean,
|
||||
name: precios.name,
|
||||
imageUrl: precios.imageUrl,
|
||||
})
|
||||
.select({ ean: precios.ean, name: precios.name })
|
||||
.from(precios)
|
||||
.groupBy(precios.ean)
|
||||
.having(sql`max(length(name))`)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
<script lang="ts">
|
||||
import ProductPreview from "$lib/ProductPreview.svelte";
|
||||
import type { PageData } from "./$types";
|
||||
|
||||
export let data: PageData;
|
||||
|
@ -31,10 +30,12 @@
|
|||
|
||||
<section>
|
||||
<h2 class="text-lg font-bold">Random</h2>
|
||||
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
|
||||
<ul>
|
||||
{#each data.precios as product}
|
||||
<li>
|
||||
<ProductPreview {product} />
|
||||
<a href={`/ean/${product.ean}`}>
|
||||
{product.name}
|
||||
</a>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
{#if data.meta}
|
||||
<h1 class="text-3xl font-bold">{data.meta.name}</h1>
|
||||
<img src={data.meta.imageUrl} alt={data.meta.name} class="max-h-48" />
|
||||
<img src={data.meta.imageUrl} class="max-h-48" />
|
||||
<div class="flex gap-2">
|
||||
{#each urls as [supermercado, url]}
|
||||
<a
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
}
|
||||
</script>
|
||||
|
||||
<div class="h-[300px] w-full min-w-[500px] bg-neutral-200 dark:invert">
|
||||
<div class="h-[300px] w-full min-w-[500px]">
|
||||
<ChartJs
|
||||
type="line"
|
||||
data={{ datasets }}
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
import { error } from "@sveltejs/kit";
|
||||
import { eq, max, sql } from "drizzle-orm";
|
||||
import type { PageServerLoad } from "./$types";
|
||||
import { db, schema } from "$lib/server/db";
|
||||
const { precios } = schema;
|
||||
|
||||
export const load: PageServerLoad = async ({ url }) => {
|
||||
const query = url.searchParams.get("q");
|
||||
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
|
||||
if (query) {
|
||||
results = db.all(
|
||||
sql`select p.ean, p.name, p.image_url as imageUrl from precios_fts f
|
||||
join precios p on p.ean = f.ean
|
||||
where f.name match ${query};`,
|
||||
);
|
||||
}
|
||||
|
||||
return { query, results };
|
||||
};
|
|
@ -1,21 +0,0 @@
|
|||
<script lang="ts">
|
||||
import ProductPreview from "$lib/ProductPreview.svelte";
|
||||
import type { PageData } from "./$types";
|
||||
|
||||
export let data: PageData;
|
||||
</script>
|
||||
|
||||
{#if data.results}
|
||||
<header class="my-2">
|
||||
<h1 class="text-2xl font-bold">Resultados para "{data.query}"</h1>
|
||||
</header>
|
||||
<ul class="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
|
||||
{#each data.results as product}
|
||||
<li>
|
||||
<ProductPreview {product} />
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{:else}
|
||||
Probá buscando algo.
|
||||
{/if}
|
|
@ -1,5 +1,5 @@
|
|||
import adapter from "@sveltejs/adapter-node";
|
||||
// import adapter from "svelte-adapter-bun";
|
||||
// import adapter from "@sveltejs/adapter-node";
|
||||
import adapter from "svelte-adapter-bun";
|
||||
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
|
|
Loading…
Reference in a new issue