diff --git a/index.js b/index.js index 24d87cd..4fdb62a 100644 --- a/index.js +++ b/index.js @@ -6,38 +6,56 @@ import { join } from "path"; const { argv } = process; const dirPath = argv[2] || "."; -function recurseElement(report, el) { +function recurseElement(report, rawHtml, el) { if (el.name === "a") { if (el.attribs.href !== undefined) { if (isHttp(el.attribs.href)) { - report.infos.push(`HTTP/S link: ${getHtml(el)}`); + report.things.push({ + type: "link-http", + description: getText(el), + }); } else if (isAbsolute(el.attribs.href)) { - report.warnings.push(`Absolute link: ${getHtml(el)}`); + report.things.push({ + type: "link-absolute", + description: getText(el), + }); } } else { - report.warnings.push(`Link with no href: ${getHtml(el)}`); + report.things.push({ + type: "link-no-href", + description: getText(el), + }); } } if (["audio", "video", "img"].includes(el.name)) { if (el.attribs.src) { - checkUrl(report, el.name, el, el.attribs.src); + checkUrl(report, rawHtml, "media", el, el.attribs.src); } else { - report.warnings.push(`${el.name} with no src: ${getHtml(el)}`); + report.things.push({ + type: "media-no-src", + description: getHtml(rawHtml, el), + }); } } for (const child of el.children) { if (child.type === "tag") { - recurseElement(report, child); + recurseElement(report, rawHtml, child); } } } -function checkUrl(report, type, el, url) { +function checkUrl(report, rawHtml, type, el, url) { if (isHttp(url)) { - report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`); + report.things.push({ + type: type + "-http", + description: getHtml(rawHtml, el), + }); } else if (isAbsolute(url)) { - report.warnings.push(`Absolute ${type}: ${getHtml(el)}`); + report.things.push({ + type: type + "-absolute", + description: getHtml(rawHtml, el), + }); } } @@ -49,12 +67,14 @@ function isAbsolute(url) { return url.startsWith("/"); } -function getHtml(el) { - // return rawHtml.slice(el.startIndex, el.endIndex); +function getHtml(rawHtml, el) { + return rawHtml.slice(el.startIndex, el.endIndex); +} +function getText(el) { let text = ""; for (const child of el.children) { if (child.type === "text") text += child.data.trim(); - else if (child.type === "tag") text += getHtml(child); + else if (child.type === "tag") text += getText(child); } return text; } @@ -67,12 +87,13 @@ function processFile(content) { reject(error); } else { let report = { + things: [], warnings: [], infos: [], }; for (const el of dom) { if (el.type === "tag") { - recurseElement(report, el); + recurseElement(report, content, el); } } resolve(report); @@ -85,7 +106,7 @@ function processFile(content) { }); } -let reports = []; +let reports = {}; async function recurseDirectory(reports, path) { const dir = await readdir(path, { withFileTypes: true }); for (const file of dir) { @@ -95,14 +116,35 @@ async function recurseDirectory(reports, path) { if (!file.name.endsWith(".html")) continue; const content = await readFile(filePath, "utf-8"); console.time(filePath); - reports.push(await processFile(content)); + reports[path] = await processFile(content); console.timeEnd(filePath); } } } await recurseDirectory(reports, dirPath); -const totalWarnings = reports.map((r) => r.warnings).flat(); +const totalThings = Object.values(reports) + .map((r) => r.things) + .flat(); +const kinds = new Set(totalThings.map((t) => t.type)); console.log( - `Finished with ${reports.length} files read, ${totalWarnings.length} warnings`, - totalWarnings + `Finished with ${reports.length} files read, ${totalThings.length} things` +); +for (const kind of kinds) { + const count = totalThings.filter((t) => t.type === kind).length; + console.log(`==> ${kind}: ${count}`); +} +console.log("This means:"); +const pathBasedCount = totalThings.filter((t) => + ["media-absolute", "media-http"].includes(t.type) +).length; +console.log( + `==> ${pathBasedCount} problems that affect users using legacy IPFS gateways` +); +const mediaHttp = totalThings.filter((t) => t.type === "media-http").length; +console.log( + `==> ${mediaHttp} problems that make the website not self-contained, making it miss content if HTTP is unavailable` +); +const linkHttp = totalThings.filter((t) => t.type === "link-http").length; +console.log( + `==> ${linkHttp} links to HTTP sites, which is not a real concern unless it's a key part of the site's navigation.` );