mirror of
https://github.com/catdevnull/preciazo.git
synced 2025-02-23 16:26:24 +00:00
scraper links coto
This commit is contained in:
parent
3f7520393a
commit
32d64b83d4
5 changed files with 1137 additions and 0 deletions
29
.vscode/launch.json
vendored
Normal file
29
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Launch Program",
|
||||
"skipFiles": ["<node_internals>/**"],
|
||||
"cwd": "${workspaceFolder}/scraper",
|
||||
"runtimeArgs": ["--import", "tsx/esm"],
|
||||
"program": "${workspaceFolder}/scraper/scrap.ts",
|
||||
"args": ["carrefour.warc.gz"],
|
||||
"outFiles": ["${workspaceFolder}/**/*.js"]
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "coto-link-scraper",
|
||||
"skipFiles": ["<node_internals>/**"],
|
||||
"cwd": "${workspaceFolder}/coto-link-scraper",
|
||||
"runtimeArgs": ["--import", "tsx/esm"],
|
||||
"program": "${workspaceFolder}/coto-link-scraper/index.ts",
|
||||
"outFiles": ["${workspaceFolder}/**/*.js"]
|
||||
},
|
||||
]
|
||||
}
|
38
coto-link-scraper/index.ts
Normal file
38
coto-link-scraper/index.ts
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Import puppeteer
|
||||
import puppeteer from "puppeteer";
|
||||
|
||||
(async () => {
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
await page.goto(
|
||||
"https://www.cotodigital3.com.ar/sitios/cdigi/browse/catalogo-almac%C3%A9n/"
|
||||
);
|
||||
|
||||
async function getHrefs() {
|
||||
const element = await page.waitForSelector(".product_info_container a");
|
||||
await element?.dispose();
|
||||
const hrefs = await page.evaluate(() =>
|
||||
Array.from(
|
||||
document.querySelectorAll<HTMLAnchorElement>(
|
||||
".product_info_container a"
|
||||
),
|
||||
(a) => new URL(a.href).toString()
|
||||
)
|
||||
);
|
||||
return hrefs;
|
||||
}
|
||||
try {
|
||||
while (true) {
|
||||
const hrefs = await getHrefs();
|
||||
hrefs.forEach((href) => console.log(href));
|
||||
|
||||
const btn = await page.waitForSelector('a[title="Siguiente"]', {
|
||||
timeout: 5000,
|
||||
});
|
||||
await btn?.click();
|
||||
await btn?.dispose();
|
||||
}
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
})();
|
17
coto-link-scraper/package.json
Normal file
17
coto-link-scraper/package.json
Normal file
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "coto-link-scraper",
|
||||
"type": "module",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"puppeteer": "^21.6.1",
|
||||
"tsx": "^4.7.0"
|
||||
}
|
||||
}
|
1053
coto-link-scraper/pnpm-lock.yaml
Normal file
1053
coto-link-scraper/pnpm-lock.yaml
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue