wip: dia-link-scraper

This commit is contained in:
Cat /dev/Nulo 2023-12-21 23:59:06 -03:00
parent cc79195e3d
commit 470629678e
2 changed files with 69 additions and 0 deletions

51
dia-link-scraper/index.ts Normal file
View file

@ -0,0 +1,51 @@
import puppeteer from "puppeteer";
import { blockImages } from "../puppeteer-utils";
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: { height: 4000, width: 1920 },
});
const page = await browser.newPage();
await blockImages(page);
await page.goto("https://diaonline.supermercadosdia.com.ar/almacen/");
async function getHrefs() {
return await page.evaluate(() =>
Array.from(
document.querySelectorAll<HTMLAnchorElement>(
"a.vtex-product-summary-2-x-clearLink"
),
(a) => new URL(a.href).toString()
)
);
}
const seeMoreSel = "button ::-p-text(Ver más Productos)";
await page.waitForSelector(seeMoreSel);
try {
let prev = { n: 0, d: Date.now() };
while (true) {
const hrefs = await getHrefs();
console.debug(prev);
if (prev.n === hrefs.length && Date.now() > prev.d + 15000) break;
prev = { n: hrefs.length, d: Date.now() };
const seeMoreEl = await page.$eval(seeMoreSel, (el) => {
el.parentElement?.click();
el.scrollIntoView({ block: "center", behavior: "smooth" });
});
await wait(150);
}
const hrefs = await getHrefs();
hrefs.forEach((l) => console.log(l));
} finally {
await browser.close();
}
})();
function wait(ms: number) {
return new Promise((resolve) => setTimeout(() => resolve(void 0), ms));
}

View file

@ -0,0 +1,18 @@
{
"name": "dia-link-scraper",
"type": "module",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"puppeteer": "^21.6.1",
"puppeteer-utils": "workspace:^",
"tsx": "^4.7.0"
}
}