From b8276ce7fd0344212253a7785737b85d41d0c2a4 Mon Sep 17 00:00:00 2001 From: Nulo Date: Fri, 22 Dec 2023 16:33:44 -0300 Subject: [PATCH] scrapear coto --- .gitignore | 3 ++- scraper/coto.ts | 38 ++++++++++++++++++++++++++++++++++++++ scraper/scrap.ts | 7 +++++-- 3 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 scraper/coto.ts diff --git a/.gitignore b/.gitignore index c73b41a..e175833 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ p node_modules/ *.db scraper/debug/ -scraper/x.tsv \ No newline at end of file +scraper/x.tsv +*.tmp \ No newline at end of file diff --git a/scraper/coto.ts b/scraper/coto.ts new file mode 100644 index 0000000..2885b41 --- /dev/null +++ b/scraper/coto.ts @@ -0,0 +1,38 @@ +import { parseHTML } from "linkedom"; +import { type Precioish } from "./scrap.js"; + +function getEanFromText({ document }: Window) { + const potentialEanEls = Array.from( + document.querySelectorAll("div#brandText") + ); + const eanParent = potentialEanEls.find( + (el) => el.textContent?.includes("| EAN: ") + ); + if (!eanParent) throw new Error("no encuentro el eanparent"); + + const eanEl = Array.from( + eanParent?.querySelectorAll("span.span_codigoplu") + )[1]; + const ean = eanEl?.textContent?.trim(); + if (!ean) throw new Error("no encuentro el ean"); + return ean; +} +function getPriceFromText({ document }: Window) { + const el = document.querySelector(".atg_store_newPrice"); + if (!el) throw new Error("no encuentro el precio"); + const nStr = el + .textContent!.trim() + .replace("$", "") + .replaceAll(".", "") + .replace(",", "."); + return parseFloat(nStr) * 100; +} + +export function getCotoProduct(html: string | Buffer): Precioish { + const dom = parseHTML(html); + + const ean = getEanFromText(dom); + const precioCentavos = getPriceFromText(dom); + + return { ean, precioCentavos }; +} diff --git a/scraper/scrap.ts b/scraper/scrap.ts index 8b667d4..d8ec9ff 100644 --- a/scraper/scrap.ts +++ b/scraper/scrap.ts @@ -11,6 +11,7 @@ import { createHash } from "crypto"; import { migrate } from "drizzle-orm/bun-sqlite/migrator"; import { getCarrefourProduct } from "./carrefour.js"; import { getDiaProduct } from "./dia.js"; +import { getCotoProduct } from "./coto.js"; import { join } from "path"; const sqlite = new Database("sqlite.db"); @@ -19,7 +20,7 @@ const db = drizzle(sqlite); const DEBUG = true; export type Precio = typeof precios.$inferInsert; -export type Precioish = Omit; +export type Precioish = Omit; async function storePrecioPoint(point: Precio) { await db.insert(precios).values(point); @@ -45,7 +46,9 @@ async function storePrecioPoint(point: Precio) { ish = getCarrefourProduct(html); else if (url.hostname === "diaonline.supermercadosdia.com.ar") ish = getDiaProduct(html); - else console.error(`Unknown host ${url.hostname}`); + else if (url.hostname === "www.cotodigital3.com.ar") + ish = getCotoProduct(html); + else throw new Error(`Unknown host ${url.hostname}`); const p: Precio = { ...ish,