cli: poder scrappear links especificos

This commit is contained in:
Cat /dev/Nulo 2024-01-04 17:48:27 -03:00
parent e6f084b1da
commit da9f2c8348
2 changed files with 16 additions and 9 deletions

View file

@ -2,7 +2,7 @@ import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js";
import { scrapCotoProducts } from "../coto-link-scraper/index.js"; import { scrapCotoProducts } from "../coto-link-scraper/index.js";
import { scrapDiaProducts } from "../dia-link-scraper/index.js"; import { scrapDiaProducts } from "../dia-link-scraper/index.js";
import { auto } from "./auto.js"; import { auto } from "./auto.js";
import { downloadList } from "./scrap.js"; import { downloadList, getProduct } from "./scrap.js";
if (process.argv[2] === "auto") { if (process.argv[2] === "auto") {
await auto(); await auto();
@ -12,6 +12,11 @@ if (process.argv[2] === "auto") {
await scrapDiaProducts(); await scrapDiaProducts();
} else if (process.argv[2] === "scrap-coto-links") { } else if (process.argv[2] === "scrap-coto-links") {
await scrapCotoProducts(); await scrapCotoProducts();
} else if (process.argv[2] === "scrap-link") {
const url = new URL(process.argv[3]);
const res = await fetch(url);
const text = await res.text();
console.info(getProduct(url, text));
} else if (process.argv[2] === "scrap") { } else if (process.argv[2] === "scrap") {
const urlLists = process.argv.slice(3); const urlLists = process.argv.slice(3);
if (urlLists.length > 0) { if (urlLists.length > 0) {

View file

@ -60,6 +60,15 @@ export async function downloadList(path: string) {
return progress; return progress;
} }
export function getProduct(url: URL, html: string) {
if (url.hostname === "www.carrefour.com.ar") return getCarrefourProduct(html);
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
return getDiaProduct(html);
else if (url.hostname === "www.cotodigital3.com.ar")
return getCotoProduct(html);
else throw new Error(`Unknown host ${url.hostname}`);
}
type ScrapResult = type ScrapResult =
| { type: "skipped" } | { type: "skipped" }
| { type: "done" } | { type: "done" }
@ -81,14 +90,7 @@ async function scrap(urlS: string): Promise<ScrapResult> {
const html = await res.text(); const html = await res.text();
try { try {
let ish: Precioish | undefined = undefined; let ish = getProduct(url, html);
if (url.hostname === "www.carrefour.com.ar")
ish = getCarrefourProduct(html);
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
ish = getDiaProduct(html);
else if (url.hostname === "www.cotodigital3.com.ar")
ish = getCotoProduct(html);
else throw new Error(`Unknown host ${url.hostname}`);
const p: Precio = { const p: Precio = {
...ish, ...ish,