reintentar scrap

This commit is contained in:
Cat /dev/Nulo 2024-01-04 16:31:00 -03:00
parent 7e58397c8c
commit 087be6714c

View file

@ -31,18 +31,37 @@ export async function downloadList(path: string) {
await pMap(
list,
async (urlS) => {
let res: ScrapResult = { type: "skipped" };
for (let attempts = 0; attempts < 3; attempts++) {
res = await scrap(urlS);
if (res.type === "done") {
break;
}
}
if (res.type === "error") console.error(res);
},
{ concurrency: 32 }
);
return progress;
}
type ScrapResult =
| { type: "skipped" }
| { type: "done" }
| { type: "error"; url: string; error: any };
async function scrap(urlS: string): Promise<ScrapResult> {
let url;
try {
url = new URL(urlS);
} catch (err) {
console.error("error parseando", urlS);
return;
console.error(`skipped ${urlS} because ${err}`);
return { type: "skipped" };
}
const res = await fetch(url);
if (!res.ok) {
console.debug(`skipped ${urlS} because status=${res.status} (!=200)`);
progress.skipped++;
return;
return { type: "skipped" };
}
const html = await res.text();
@ -66,15 +85,8 @@ export async function downloadList(path: string) {
await db.insert(schema.precios).values(p);
progress.done++;
return { type: "done" };
} catch (error) {
console.error({ path, urlS, error });
progress.errors.push({
path,
url: urlS,
error,
});
if (DEBUG) {
const urlHash = createHash("md5").update(urlS).digest("hex");
const output = join("debug", `${urlHash}.html`);
@ -82,10 +94,10 @@ export async function downloadList(path: string) {
await writeFile(output, html);
console.error(`wrote html to ${output}`);
}
return {
type: "error",
url: urlS,
error,
};
}
},
{ concurrency: 32 }
);
return progress;
}