diff --git a/bun.lockb b/bun.lockb index 03e0aff..c692e1b 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/carrefour-link-scraper/index.ts b/carrefour-link-scraper/index.ts index 82c0d15..6779c81 100644 --- a/carrefour-link-scraper/index.ts +++ b/carrefour-link-scraper/index.ts @@ -1,4 +1,5 @@ import pMap from "p-map"; +import { decodeXML } from "entities"; import { saveUrls } from "db-datos/urlHelpers.js"; export async function scrapCarrefourProducts() { @@ -31,7 +32,7 @@ async function scrapBySitemap() { text(element) { const txt = element.text.trim(); if (!txt) return; - urls.add(txt); + urls.add(decodeXML(txt)); }, }) .transform(new Response(xml)); diff --git a/dia-link-scraper/index.ts b/dia-link-scraper/index.ts index 67709b0..09b825c 100644 --- a/dia-link-scraper/index.ts +++ b/dia-link-scraper/index.ts @@ -1,4 +1,5 @@ import pMap from "p-map"; +import { decodeXML } from "entities"; import { parseHTML } from "linkedom"; import { getHtml } from "../scraper/fetch.js"; import { saveUrls } from "db-datos/urlHelpers.js"; @@ -90,7 +91,7 @@ async function scrapBySitemap() { text(element) { const txt = element.text.trim(); if (!txt) return; - urls.add(txt); + urls.add(decodeXML(txt)); }, }) .transform(new Response(xml)); diff --git a/scraper/package.json b/scraper/package.json index edaf0ca..a3351af 100644 --- a/scraper/package.json +++ b/scraper/package.json @@ -17,6 +17,7 @@ "date-fns": "^3.0.6", "db-datos": "workspace:^", "drizzle-orm": "=0.29.1", + "entities": "^4.5.0", "linkedom": "^0.16.5", "nanoid": "^5.0.4", "p-map": "^7.0.1",