From da9f2c83488dbd8213b2e8ef414af6615cbe2ce5 Mon Sep 17 00:00:00 2001 From: Nulo Date: Thu, 4 Jan 2024 17:48:27 -0300 Subject: [PATCH] cli: poder scrappear links especificos --- scraper/cli.ts | 7 ++++++- scraper/scrap.ts | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/scraper/cli.ts b/scraper/cli.ts index 3e8af80..b68bda7 100644 --- a/scraper/cli.ts +++ b/scraper/cli.ts @@ -2,7 +2,7 @@ import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js"; import { scrapCotoProducts } from "../coto-link-scraper/index.js"; import { scrapDiaProducts } from "../dia-link-scraper/index.js"; import { auto } from "./auto.js"; -import { downloadList } from "./scrap.js"; +import { downloadList, getProduct } from "./scrap.js"; if (process.argv[2] === "auto") { await auto(); @@ -12,6 +12,11 @@ if (process.argv[2] === "auto") { await scrapDiaProducts(); } else if (process.argv[2] === "scrap-coto-links") { await scrapCotoProducts(); +} else if (process.argv[2] === "scrap-link") { + const url = new URL(process.argv[3]); + const res = await fetch(url); + const text = await res.text(); + console.info(getProduct(url, text)); } else if (process.argv[2] === "scrap") { const urlLists = process.argv.slice(3); if (urlLists.length > 0) { diff --git a/scraper/scrap.ts b/scraper/scrap.ts index 8035a13..f482492 100644 --- a/scraper/scrap.ts +++ b/scraper/scrap.ts @@ -60,6 +60,15 @@ export async function downloadList(path: string) { return progress; } +export function getProduct(url: URL, html: string) { + if (url.hostname === "www.carrefour.com.ar") return getCarrefourProduct(html); + else if (url.hostname === "diaonline.supermercadosdia.com.ar") + return getDiaProduct(html); + else if (url.hostname === "www.cotodigital3.com.ar") + return getCotoProduct(html); + else throw new Error(`Unknown host ${url.hostname}`); +} + type ScrapResult = | { type: "skipped" } | { type: "done" } @@ -81,14 +90,7 @@ async function scrap(urlS: string): Promise { const html = await res.text(); try { - let ish: Precioish | undefined = undefined; - if (url.hostname === "www.carrefour.com.ar") - ish = getCarrefourProduct(html); - else if (url.hostname === "diaonline.supermercadosdia.com.ar") - ish = getDiaProduct(html); - else if (url.hostname === "www.cotodigital3.com.ar") - ish = getCotoProduct(html); - else throw new Error(`Unknown host ${url.hostname}`); + let ish = getProduct(url, html); const p: Precio = { ...ish,