reintentar scrap

This commit is contained in:
Cat /dev/Nulo 2024-01-04 16:31:00 -03:00
parent 7e58397c8c
commit 087be6714c

View file

@ -31,18 +31,37 @@ export async function downloadList(path: string) {
await pMap( await pMap(
list, list,
async (urlS) => { async (urlS) => {
let res: ScrapResult = { type: "skipped" };
for (let attempts = 0; attempts < 3; attempts++) {
res = await scrap(urlS);
if (res.type === "done") {
break;
}
}
if (res.type === "error") console.error(res);
},
{ concurrency: 32 }
);
return progress;
}
type ScrapResult =
| { type: "skipped" }
| { type: "done" }
| { type: "error"; url: string; error: any };
async function scrap(urlS: string): Promise<ScrapResult> {
let url; let url;
try { try {
url = new URL(urlS); url = new URL(urlS);
} catch (err) { } catch (err) {
console.error("error parseando", urlS); console.error(`skipped ${urlS} because ${err}`);
return; return { type: "skipped" };
} }
const res = await fetch(url); const res = await fetch(url);
if (!res.ok) { if (!res.ok) {
console.debug(`skipped ${urlS} because status=${res.status} (!=200)`); console.debug(`skipped ${urlS} because status=${res.status} (!=200)`);
progress.skipped++; return { type: "skipped" };
return;
} }
const html = await res.text(); const html = await res.text();
@ -66,15 +85,8 @@ export async function downloadList(path: string) {
await db.insert(schema.precios).values(p); await db.insert(schema.precios).values(p);
progress.done++; return { type: "done" };
} catch (error) { } catch (error) {
console.error({ path, urlS, error });
progress.errors.push({
path,
url: urlS,
error,
});
if (DEBUG) { if (DEBUG) {
const urlHash = createHash("md5").update(urlS).digest("hex"); const urlHash = createHash("md5").update(urlS).digest("hex");
const output = join("debug", `${urlHash}.html`); const output = join("debug", `${urlHash}.html`);
@ -82,10 +94,10 @@ export async function downloadList(path: string) {
await writeFile(output, html); await writeFile(output, html);
console.error(`wrote html to ${output}`); console.error(`wrote html to ${output}`);
} }
return {
type: "error",
url: urlS,
error,
};
} }
},
{ concurrency: 32 }
);
return progress;
} }