mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 19:46:19 +00:00
scrapear coto
This commit is contained in:
parent
f8d05e71f8
commit
b8276ce7fd
3 changed files with 45 additions and 3 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -5,4 +5,5 @@ p
|
|||
node_modules/
|
||||
*.db
|
||||
scraper/debug/
|
||||
scraper/x.tsv
|
||||
scraper/x.tsv
|
||||
*.tmp
|
38
scraper/coto.ts
Normal file
38
scraper/coto.ts
Normal file
|
@ -0,0 +1,38 @@
|
|||
import { parseHTML } from "linkedom";
|
||||
import { type Precioish } from "./scrap.js";
|
||||
|
||||
function getEanFromText({ document }: Window) {
|
||||
const potentialEanEls = Array.from(
|
||||
document.querySelectorAll("div#brandText")
|
||||
);
|
||||
const eanParent = potentialEanEls.find(
|
||||
(el) => el.textContent?.includes("| EAN: ")
|
||||
);
|
||||
if (!eanParent) throw new Error("no encuentro el eanparent");
|
||||
|
||||
const eanEl = Array.from(
|
||||
eanParent?.querySelectorAll("span.span_codigoplu")
|
||||
)[1];
|
||||
const ean = eanEl?.textContent?.trim();
|
||||
if (!ean) throw new Error("no encuentro el ean");
|
||||
return ean;
|
||||
}
|
||||
function getPriceFromText({ document }: Window) {
|
||||
const el = document.querySelector(".atg_store_newPrice");
|
||||
if (!el) throw new Error("no encuentro el precio");
|
||||
const nStr = el
|
||||
.textContent!.trim()
|
||||
.replace("$", "")
|
||||
.replaceAll(".", "")
|
||||
.replace(",", ".");
|
||||
return parseFloat(nStr) * 100;
|
||||
}
|
||||
|
||||
export function getCotoProduct(html: string | Buffer): Precioish {
|
||||
const dom = parseHTML(html);
|
||||
|
||||
const ean = getEanFromText(dom);
|
||||
const precioCentavos = getPriceFromText(dom);
|
||||
|
||||
return { ean, precioCentavos };
|
||||
}
|
|
@ -11,6 +11,7 @@ import { createHash } from "crypto";
|
|||
import { migrate } from "drizzle-orm/bun-sqlite/migrator";
|
||||
import { getCarrefourProduct } from "./carrefour.js";
|
||||
import { getDiaProduct } from "./dia.js";
|
||||
import { getCotoProduct } from "./coto.js";
|
||||
import { join } from "path";
|
||||
|
||||
const sqlite = new Database("sqlite.db");
|
||||
|
@ -19,7 +20,7 @@ const db = drizzle(sqlite);
|
|||
const DEBUG = true;
|
||||
|
||||
export type Precio = typeof precios.$inferInsert;
|
||||
export type Precioish = Omit<Precio, "fetchedAt" | "url">;
|
||||
export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">;
|
||||
|
||||
async function storePrecioPoint(point: Precio) {
|
||||
await db.insert(precios).values(point);
|
||||
|
@ -45,7 +46,9 @@ async function storePrecioPoint(point: Precio) {
|
|||
ish = getCarrefourProduct(html);
|
||||
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
|
||||
ish = getDiaProduct(html);
|
||||
else console.error(`Unknown host ${url.hostname}`);
|
||||
else if (url.hostname === "www.cotodigital3.com.ar")
|
||||
ish = getCotoProduct(html);
|
||||
else throw new Error(`Unknown host ${url.hostname}`);
|
||||
|
||||
const p: Precio = {
|
||||
...ish,
|
||||
|
|
Loading…
Reference in a new issue