mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 03:26:19 +00:00
reintentar scrap
This commit is contained in:
parent
7e58397c8c
commit
087be6714c
1 changed files with 62 additions and 50 deletions
|
@ -31,18 +31,37 @@ export async function downloadList(path: string) {
|
|||
await pMap(
|
||||
list,
|
||||
async (urlS) => {
|
||||
let res: ScrapResult = { type: "skipped" };
|
||||
for (let attempts = 0; attempts < 3; attempts++) {
|
||||
res = await scrap(urlS);
|
||||
if (res.type === "done") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (res.type === "error") console.error(res);
|
||||
},
|
||||
{ concurrency: 32 }
|
||||
);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
type ScrapResult =
|
||||
| { type: "skipped" }
|
||||
| { type: "done" }
|
||||
| { type: "error"; url: string; error: any };
|
||||
async function scrap(urlS: string): Promise<ScrapResult> {
|
||||
let url;
|
||||
try {
|
||||
url = new URL(urlS);
|
||||
} catch (err) {
|
||||
console.error("error parseando", urlS);
|
||||
return;
|
||||
console.error(`skipped ${urlS} because ${err}`);
|
||||
return { type: "skipped" };
|
||||
}
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) {
|
||||
console.debug(`skipped ${urlS} because status=${res.status} (!=200)`);
|
||||
progress.skipped++;
|
||||
return;
|
||||
return { type: "skipped" };
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
|
@ -66,15 +85,8 @@ export async function downloadList(path: string) {
|
|||
|
||||
await db.insert(schema.precios).values(p);
|
||||
|
||||
progress.done++;
|
||||
return { type: "done" };
|
||||
} catch (error) {
|
||||
console.error({ path, urlS, error });
|
||||
progress.errors.push({
|
||||
path,
|
||||
url: urlS,
|
||||
error,
|
||||
});
|
||||
|
||||
if (DEBUG) {
|
||||
const urlHash = createHash("md5").update(urlS).digest("hex");
|
||||
const output = join("debug", `${urlHash}.html`);
|
||||
|
@ -82,10 +94,10 @@ export async function downloadList(path: string) {
|
|||
await writeFile(output, html);
|
||||
console.error(`wrote html to ${output}`);
|
||||
}
|
||||
return {
|
||||
type: "error",
|
||||
url: urlS,
|
||||
error,
|
||||
};
|
||||
}
|
||||
},
|
||||
{ concurrency: 32 }
|
||||
);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue