scrapear coto

This commit is contained in:
Cat /dev/Nulo 2023-12-22 16:33:44 -03:00
parent f8d05e71f8
commit b8276ce7fd
3 changed files with 45 additions and 3 deletions

3
.gitignore vendored
View file

@ -5,4 +5,5 @@ p
node_modules/ node_modules/
*.db *.db
scraper/debug/ scraper/debug/
scraper/x.tsv scraper/x.tsv
*.tmp

38
scraper/coto.ts Normal file
View file

@ -0,0 +1,38 @@
import { parseHTML } from "linkedom";
import { type Precioish } from "./scrap.js";
function getEanFromText({ document }: Window) {
const potentialEanEls = Array.from(
document.querySelectorAll("div#brandText")
);
const eanParent = potentialEanEls.find(
(el) => el.textContent?.includes("| EAN: ")
);
if (!eanParent) throw new Error("no encuentro el eanparent");
const eanEl = Array.from(
eanParent?.querySelectorAll("span.span_codigoplu")
)[1];
const ean = eanEl?.textContent?.trim();
if (!ean) throw new Error("no encuentro el ean");
return ean;
}
function getPriceFromText({ document }: Window) {
const el = document.querySelector(".atg_store_newPrice");
if (!el) throw new Error("no encuentro el precio");
const nStr = el
.textContent!.trim()
.replace("$", "")
.replaceAll(".", "")
.replace(",", ".");
return parseFloat(nStr) * 100;
}
export function getCotoProduct(html: string | Buffer): Precioish {
const dom = parseHTML(html);
const ean = getEanFromText(dom);
const precioCentavos = getPriceFromText(dom);
return { ean, precioCentavos };
}

View file

@ -11,6 +11,7 @@ import { createHash } from "crypto";
import { migrate } from "drizzle-orm/bun-sqlite/migrator"; import { migrate } from "drizzle-orm/bun-sqlite/migrator";
import { getCarrefourProduct } from "./carrefour.js"; import { getCarrefourProduct } from "./carrefour.js";
import { getDiaProduct } from "./dia.js"; import { getDiaProduct } from "./dia.js";
import { getCotoProduct } from "./coto.js";
import { join } from "path"; import { join } from "path";
const sqlite = new Database("sqlite.db"); const sqlite = new Database("sqlite.db");
@ -19,7 +20,7 @@ const db = drizzle(sqlite);
const DEBUG = true; const DEBUG = true;
export type Precio = typeof precios.$inferInsert; export type Precio = typeof precios.$inferInsert;
export type Precioish = Omit<Precio, "fetchedAt" | "url">; export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">;
async function storePrecioPoint(point: Precio) { async function storePrecioPoint(point: Precio) {
await db.insert(precios).values(point); await db.insert(precios).values(point);
@ -45,7 +46,9 @@ async function storePrecioPoint(point: Precio) {
ish = getCarrefourProduct(html); ish = getCarrefourProduct(html);
else if (url.hostname === "diaonline.supermercadosdia.com.ar") else if (url.hostname === "diaonline.supermercadosdia.com.ar")
ish = getDiaProduct(html); ish = getDiaProduct(html);
else console.error(`Unknown host ${url.hostname}`); else if (url.hostname === "www.cotodigital3.com.ar")
ish = getCotoProduct(html);
else throw new Error(`Unknown host ${url.hostname}`);
const p: Precio = { const p: Precio = {
...ish, ...ish,