mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-29 21:16:19 +00:00
cli: poder scrappear links especificos
This commit is contained in:
parent
e6f084b1da
commit
da9f2c8348
2 changed files with 16 additions and 9 deletions
|
@ -2,7 +2,7 @@ import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js";
|
||||||
import { scrapCotoProducts } from "../coto-link-scraper/index.js";
|
import { scrapCotoProducts } from "../coto-link-scraper/index.js";
|
||||||
import { scrapDiaProducts } from "../dia-link-scraper/index.js";
|
import { scrapDiaProducts } from "../dia-link-scraper/index.js";
|
||||||
import { auto } from "./auto.js";
|
import { auto } from "./auto.js";
|
||||||
import { downloadList } from "./scrap.js";
|
import { downloadList, getProduct } from "./scrap.js";
|
||||||
|
|
||||||
if (process.argv[2] === "auto") {
|
if (process.argv[2] === "auto") {
|
||||||
await auto();
|
await auto();
|
||||||
|
@ -12,6 +12,11 @@ if (process.argv[2] === "auto") {
|
||||||
await scrapDiaProducts();
|
await scrapDiaProducts();
|
||||||
} else if (process.argv[2] === "scrap-coto-links") {
|
} else if (process.argv[2] === "scrap-coto-links") {
|
||||||
await scrapCotoProducts();
|
await scrapCotoProducts();
|
||||||
|
} else if (process.argv[2] === "scrap-link") {
|
||||||
|
const url = new URL(process.argv[3]);
|
||||||
|
const res = await fetch(url);
|
||||||
|
const text = await res.text();
|
||||||
|
console.info(getProduct(url, text));
|
||||||
} else if (process.argv[2] === "scrap") {
|
} else if (process.argv[2] === "scrap") {
|
||||||
const urlLists = process.argv.slice(3);
|
const urlLists = process.argv.slice(3);
|
||||||
if (urlLists.length > 0) {
|
if (urlLists.length > 0) {
|
||||||
|
|
|
@ -60,6 +60,15 @@ export async function downloadList(path: string) {
|
||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getProduct(url: URL, html: string) {
|
||||||
|
if (url.hostname === "www.carrefour.com.ar") return getCarrefourProduct(html);
|
||||||
|
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
|
||||||
|
return getDiaProduct(html);
|
||||||
|
else if (url.hostname === "www.cotodigital3.com.ar")
|
||||||
|
return getCotoProduct(html);
|
||||||
|
else throw new Error(`Unknown host ${url.hostname}`);
|
||||||
|
}
|
||||||
|
|
||||||
type ScrapResult =
|
type ScrapResult =
|
||||||
| { type: "skipped" }
|
| { type: "skipped" }
|
||||||
| { type: "done" }
|
| { type: "done" }
|
||||||
|
@ -81,14 +90,7 @@ async function scrap(urlS: string): Promise<ScrapResult> {
|
||||||
const html = await res.text();
|
const html = await res.text();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let ish: Precioish | undefined = undefined;
|
let ish = getProduct(url, html);
|
||||||
if (url.hostname === "www.carrefour.com.ar")
|
|
||||||
ish = getCarrefourProduct(html);
|
|
||||||
else if (url.hostname === "diaonline.supermercadosdia.com.ar")
|
|
||||||
ish = getDiaProduct(html);
|
|
||||||
else if (url.hostname === "www.cotodigital3.com.ar")
|
|
||||||
ish = getCotoProduct(html);
|
|
||||||
else throw new Error(`Unknown host ${url.hostname}`);
|
|
||||||
|
|
||||||
const p: Precio = {
|
const p: Precio = {
|
||||||
...ish,
|
...ish,
|
||||||
|
|
Loading…
Reference in a new issue