mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-23 06:36:19 +00:00
wip: dia-link-scraper
This commit is contained in:
parent
cc79195e3d
commit
470629678e
2 changed files with 69 additions and 0 deletions
51
dia-link-scraper/index.ts
Normal file
51
dia-link-scraper/index.ts
Normal file
|
@ -0,0 +1,51 @@
|
|||
import puppeteer from "puppeteer";
|
||||
import { blockImages } from "../puppeteer-utils";
|
||||
|
||||
(async () => {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
defaultViewport: { height: 4000, width: 1920 },
|
||||
});
|
||||
const page = await browser.newPage();
|
||||
|
||||
await blockImages(page);
|
||||
|
||||
await page.goto("https://diaonline.supermercadosdia.com.ar/almacen/");
|
||||
|
||||
async function getHrefs() {
|
||||
return await page.evaluate(() =>
|
||||
Array.from(
|
||||
document.querySelectorAll<HTMLAnchorElement>(
|
||||
"a.vtex-product-summary-2-x-clearLink"
|
||||
),
|
||||
(a) => new URL(a.href).toString()
|
||||
)
|
||||
);
|
||||
}
|
||||
const seeMoreSel = "button ::-p-text(Ver más Productos)";
|
||||
await page.waitForSelector(seeMoreSel);
|
||||
|
||||
try {
|
||||
let prev = { n: 0, d: Date.now() };
|
||||
while (true) {
|
||||
const hrefs = await getHrefs();
|
||||
console.debug(prev);
|
||||
if (prev.n === hrefs.length && Date.now() > prev.d + 15000) break;
|
||||
prev = { n: hrefs.length, d: Date.now() };
|
||||
|
||||
const seeMoreEl = await page.$eval(seeMoreSel, (el) => {
|
||||
el.parentElement?.click();
|
||||
el.scrollIntoView({ block: "center", behavior: "smooth" });
|
||||
});
|
||||
await wait(150);
|
||||
}
|
||||
const hrefs = await getHrefs();
|
||||
hrefs.forEach((l) => console.log(l));
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
})();
|
||||
|
||||
function wait(ms: number) {
|
||||
return new Promise((resolve) => setTimeout(() => resolve(void 0), ms));
|
||||
}
|
18
dia-link-scraper/package.json
Normal file
18
dia-link-scraper/package.json
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "dia-link-scraper",
|
||||
"type": "module",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"puppeteer": "^21.6.1",
|
||||
"puppeteer-utils": "workspace:^",
|
||||
"tsx": "^4.7.0"
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue