mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 11:36:20 +00:00
reintentar scrap
This commit is contained in:
parent
7e58397c8c
commit
087be6714c
1 changed files with 62 additions and 50 deletions
|
@ -31,18 +31,37 @@ export async function downloadList(path: string) {
|
||||||
await pMap(
|
await pMap(
|
||||||
list,
|
list,
|
||||||
async (urlS) => {
|
async (urlS) => {
|
||||||
|
let res: ScrapResult = { type: "skipped" };
|
||||||
|
for (let attempts = 0; attempts < 3; attempts++) {
|
||||||
|
res = await scrap(urlS);
|
||||||
|
if (res.type === "done") {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (res.type === "error") console.error(res);
|
||||||
|
},
|
||||||
|
{ concurrency: 32 }
|
||||||
|
);
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ScrapResult =
|
||||||
|
| { type: "skipped" }
|
||||||
|
| { type: "done" }
|
||||||
|
| { type: "error"; url: string; error: any };
|
||||||
|
async function scrap(urlS: string): Promise<ScrapResult> {
|
||||||
let url;
|
let url;
|
||||||
try {
|
try {
|
||||||
url = new URL(urlS);
|
url = new URL(urlS);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("error parseando", urlS);
|
console.error(`skipped ${urlS} because ${err}`);
|
||||||
return;
|
return { type: "skipped" };
|
||||||
}
|
}
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
console.debug(`skipped ${urlS} because status=${res.status} (!=200)`);
|
console.debug(`skipped ${urlS} because status=${res.status} (!=200)`);
|
||||||
progress.skipped++;
|
return { type: "skipped" };
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const html = await res.text();
|
const html = await res.text();
|
||||||
|
@ -66,15 +85,8 @@ export async function downloadList(path: string) {
|
||||||
|
|
||||||
await db.insert(schema.precios).values(p);
|
await db.insert(schema.precios).values(p);
|
||||||
|
|
||||||
progress.done++;
|
return { type: "done" };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error({ path, urlS, error });
|
|
||||||
progress.errors.push({
|
|
||||||
path,
|
|
||||||
url: urlS,
|
|
||||||
error,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
const urlHash = createHash("md5").update(urlS).digest("hex");
|
const urlHash = createHash("md5").update(urlS).digest("hex");
|
||||||
const output = join("debug", `${urlHash}.html`);
|
const output = join("debug", `${urlHash}.html`);
|
||||||
|
@ -82,10 +94,10 @@ export async function downloadList(path: string) {
|
||||||
await writeFile(output, html);
|
await writeFile(output, html);
|
||||||
console.error(`wrote html to ${output}`);
|
console.error(`wrote html to ${output}`);
|
||||||
}
|
}
|
||||||
|
return {
|
||||||
|
type: "error",
|
||||||
|
url: urlS,
|
||||||
|
error,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
},
|
|
||||||
{ concurrency: 32 }
|
|
||||||
);
|
|
||||||
|
|
||||||
return progress;
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue