From 470629678e4184b04f546077bba48957fe9a6f83 Mon Sep 17 00:00:00 2001 From: Nulo Date: Thu, 21 Dec 2023 23:59:06 -0300 Subject: [PATCH] wip: dia-link-scraper --- dia-link-scraper/index.ts | 51 +++++++++++++++++++++++++++++++++++ dia-link-scraper/package.json | 18 +++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 dia-link-scraper/index.ts create mode 100644 dia-link-scraper/package.json diff --git a/dia-link-scraper/index.ts b/dia-link-scraper/index.ts new file mode 100644 index 0000000..7eb211f --- /dev/null +++ b/dia-link-scraper/index.ts @@ -0,0 +1,51 @@ +import puppeteer from "puppeteer"; +import { blockImages } from "../puppeteer-utils"; + +(async () => { + const browser = await puppeteer.launch({ + headless: false, + defaultViewport: { height: 4000, width: 1920 }, + }); + const page = await browser.newPage(); + + await blockImages(page); + + await page.goto("https://diaonline.supermercadosdia.com.ar/almacen/"); + + async function getHrefs() { + return await page.evaluate(() => + Array.from( + document.querySelectorAll( + "a.vtex-product-summary-2-x-clearLink" + ), + (a) => new URL(a.href).toString() + ) + ); + } + const seeMoreSel = "button ::-p-text(Ver más Productos)"; + await page.waitForSelector(seeMoreSel); + + try { + let prev = { n: 0, d: Date.now() }; + while (true) { + const hrefs = await getHrefs(); + console.debug(prev); + if (prev.n === hrefs.length && Date.now() > prev.d + 15000) break; + prev = { n: hrefs.length, d: Date.now() }; + + const seeMoreEl = await page.$eval(seeMoreSel, (el) => { + el.parentElement?.click(); + el.scrollIntoView({ block: "center", behavior: "smooth" }); + }); + await wait(150); + } + const hrefs = await getHrefs(); + hrefs.forEach((l) => console.log(l)); + } finally { + await browser.close(); + } +})(); + +function wait(ms: number) { + return new Promise((resolve) => setTimeout(() => resolve(void 0), ms)); +} diff --git a/dia-link-scraper/package.json b/dia-link-scraper/package.json new file mode 100644 index 0000000..4f9c7d6 --- /dev/null +++ b/dia-link-scraper/package.json @@ -0,0 +1,18 @@ +{ + "name": "dia-link-scraper", + "type": "module", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "puppeteer": "^21.6.1", + "puppeteer-utils": "workspace:^", + "tsx": "^4.7.0" + } +}