mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 11:36:20 +00:00
scrapear coto
This commit is contained in:
parent
f8d05e71f8
commit
b8276ce7fd
3 changed files with 45 additions and 3 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -5,4 +5,5 @@ p
|
||||||
node_modules/
|
node_modules/
|
||||||
*.db
|
*.db
|
||||||
scraper/debug/
|
scraper/debug/
|
||||||
scraper/x.tsv
|
scraper/x.tsv
|
||||||
|
*.tmp
|
38
scraper/coto.ts
Normal file
38
scraper/coto.ts
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import { parseHTML } from "linkedom";
|
||||||
|
import { type Precioish } from "./scrap.js";
|
||||||
|
|
||||||
|
function getEanFromText({ document }: Window) {
|
||||||
|
const potentialEanEls = Array.from(
|
||||||
|
document.querySelectorAll("div#brandText")
|
||||||
|
);
|
||||||
|
const eanParent = potentialEanEls.find(
|
||||||
|
(el) => el.textContent?.includes("| EAN: ")
|
||||||
|
);
|
||||||
|
if (!eanParent) throw new Error("no encuentro el eanparent");
|
||||||
|
|
||||||
|
const eanEl = Array.from(
|
||||||
|
eanParent?.querySelectorAll("span.span_codigoplu")
|
||||||
|
)[1];
|
||||||
|
const ean = eanEl?.textContent?.trim();
|
||||||
|
if (!ean) throw new Error("no encuentro el ean");
|
||||||
|
return ean;
|
||||||
|
}
|
||||||
|
function getPriceFromText({ document }: Window) {
|
||||||
|
const el = document.querySelector(".atg_store_newPrice");
|
||||||
|
if (!el) throw new Error("no encuentro el precio");
|
||||||
|
const nStr = el
|
||||||
|
.textContent!.trim()
|
||||||
|
.replace("$", "")
|
||||||
|
.replaceAll(".", "")
|
||||||
|
.replace(",", ".");
|
||||||
|
return parseFloat(nStr) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCotoProduct(html: string | Buffer): Precioish {
|
||||||
|
const dom = parseHTML(html);
|
||||||
|
|
||||||
|
const ean = getEanFromText(dom);
|
||||||
|
const precioCentavos = getPriceFromText(dom);
|
||||||
|
|
||||||
|
return { ean, precioCentavos };
|
||||||
|
}
|
|
@ -11,6 +11,7 @@ import { createHash } from "crypto";
|
||||||
import { migrate } from "drizzle-orm/bun-sqlite/migrator";
|
import { migrate } from "drizzle-orm/bun-sqlite/migrator";
|
||||||
import { getCarrefourProduct } from "./carrefour.js";
|
import { getCarrefourProduct } from "./carrefour.js";
|
||||||
import { getDiaProduct } from "./dia.js";
|
import { getDiaProduct } from "./dia.js";
|
||||||
|
import { getCotoProduct } from "./coto.js";
|
||||||
import { join } from "path";
|
import { join } from "path";
|
||||||
|
|
||||||
const sqlite = new Database("sqlite.db");
|
const sqlite = new Database("sqlite.db");
|
||||||
|
@ -19,7 +20,7 @@ const db = drizzle(sqlite);
|
||||||
const DEBUG = true;
|
const DEBUG = true;
|
||||||
|
|
||||||
export type Precio = typeof precios.$inferInsert;
|
export type Precio = typeof precios.$inferInsert;
|
||||||
export type Precioish = Omit<Precio, "fetchedAt" | "url">;
|
export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">;
|
||||||
|
|
||||||
async function storePrecioPoint(point: Precio) {
|
async function storePrecioPoint(point: Precio) {
|
||||||
await db.insert(precios).values(point);
|
await db.insert(precios).values(point);
|
||||||
|
@ -45,7 +46,9 @@ async function storePrecioPoint(point: Precio) {
|
||||||
ish = getCarrefourProduct(html);
|
ish = getCarrefourProduct(html);
|
||||||
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
|
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
|
||||||
ish = getDiaProduct(html);
|
ish = getDiaProduct(html);
|
||||||
else console.error(`Unknown host ${url.hostname}`);
|
else if (url.hostname === "www.cotodigital3.com.ar")
|
||||||
|
ish = getCotoProduct(html);
|
||||||
|
else throw new Error(`Unknown host ${url.hostname}`);
|
||||||
|
|
||||||
const p: Precio = {
|
const p: Precio = {
|
||||||
...ish,
|
...ish,
|
||||||
|
|
Loading…
Reference in a new issue