mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-23 14:46:20 +00:00
wip: dia-link-scraper
This commit is contained in:
parent
cc79195e3d
commit
470629678e
2 changed files with 69 additions and 0 deletions
51
dia-link-scraper/index.ts
Normal file
51
dia-link-scraper/index.ts
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
import puppeteer from "puppeteer";
|
||||||
|
import { blockImages } from "../puppeteer-utils";
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: false,
|
||||||
|
defaultViewport: { height: 4000, width: 1920 },
|
||||||
|
});
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await blockImages(page);
|
||||||
|
|
||||||
|
await page.goto("https://diaonline.supermercadosdia.com.ar/almacen/");
|
||||||
|
|
||||||
|
async function getHrefs() {
|
||||||
|
return await page.evaluate(() =>
|
||||||
|
Array.from(
|
||||||
|
document.querySelectorAll<HTMLAnchorElement>(
|
||||||
|
"a.vtex-product-summary-2-x-clearLink"
|
||||||
|
),
|
||||||
|
(a) => new URL(a.href).toString()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const seeMoreSel = "button ::-p-text(Ver más Productos)";
|
||||||
|
await page.waitForSelector(seeMoreSel);
|
||||||
|
|
||||||
|
try {
|
||||||
|
let prev = { n: 0, d: Date.now() };
|
||||||
|
while (true) {
|
||||||
|
const hrefs = await getHrefs();
|
||||||
|
console.debug(prev);
|
||||||
|
if (prev.n === hrefs.length && Date.now() > prev.d + 15000) break;
|
||||||
|
prev = { n: hrefs.length, d: Date.now() };
|
||||||
|
|
||||||
|
const seeMoreEl = await page.$eval(seeMoreSel, (el) => {
|
||||||
|
el.parentElement?.click();
|
||||||
|
el.scrollIntoView({ block: "center", behavior: "smooth" });
|
||||||
|
});
|
||||||
|
await wait(150);
|
||||||
|
}
|
||||||
|
const hrefs = await getHrefs();
|
||||||
|
hrefs.forEach((l) => console.log(l));
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
function wait(ms: number) {
|
||||||
|
return new Promise((resolve) => setTimeout(() => resolve(void 0), ms));
|
||||||
|
}
|
18
dia-link-scraper/package.json
Normal file
18
dia-link-scraper/package.json
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"name": "dia-link-scraper",
|
||||||
|
"type": "module",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"puppeteer": "^21.6.1",
|
||||||
|
"puppeteer-utils": "workspace:^",
|
||||||
|
"tsx": "^4.7.0"
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue