From 2fdd89b4f8a6bfa577d0fec62105b805df1fd3a8 Mon Sep 17 00:00:00 2001 From: Nulo Date: Sat, 23 Dec 2023 20:43:53 -0300 Subject: [PATCH] TODO --- scraper/scrap.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scraper/scrap.ts b/scraper/scrap.ts index 6a9a511..7d9e91d 100644 --- a/scraper/scrap.ts +++ b/scraper/scrap.ts @@ -49,6 +49,9 @@ async function parseWarc(path: string) { for await (const record of parser) { if (record.warcType === "response") { if (!record.warcTargetURI) continue; + + // TODO: saltear si ya existe el record-id con el mismo parser version + // y sobreescribir si existe el mismo record-id pero con version mas bajo? const html = await record.contentText(); const url = new URL(record.warcTargetURI);