Make reports more useful

This commit is contained in:
Cat /dev/Nulo 2022-11-25 19:53:24 -03:00
parent 93eaa423e1
commit dfe673bff1

View file

@ -6,38 +6,56 @@ import { join } from "path";
const { argv } = process; const { argv } = process;
const dirPath = argv[2] || "."; const dirPath = argv[2] || ".";
function recurseElement(report, el) { function recurseElement(report, rawHtml, el) {
if (el.name === "a") { if (el.name === "a") {
if (el.attribs.href !== undefined) { if (el.attribs.href !== undefined) {
if (isHttp(el.attribs.href)) { if (isHttp(el.attribs.href)) {
report.infos.push(`HTTP/S link: ${getHtml(el)}`); report.things.push({
type: "link-http",
description: getText(el),
});
} else if (isAbsolute(el.attribs.href)) { } else if (isAbsolute(el.attribs.href)) {
report.warnings.push(`Absolute link: ${getHtml(el)}`); report.things.push({
type: "link-absolute",
description: getText(el),
});
} }
} else { } else {
report.warnings.push(`Link with no href: ${getHtml(el)}`); report.things.push({
type: "link-no-href",
description: getText(el),
});
} }
} }
if (["audio", "video", "img"].includes(el.name)) { if (["audio", "video", "img"].includes(el.name)) {
if (el.attribs.src) { if (el.attribs.src) {
checkUrl(report, el.name, el, el.attribs.src); checkUrl(report, rawHtml, "media", el, el.attribs.src);
} else { } else {
report.warnings.push(`${el.name} with no src: ${getHtml(el)}`); report.things.push({
type: "media-no-src",
description: getHtml(rawHtml, el),
});
} }
} }
for (const child of el.children) { for (const child of el.children) {
if (child.type === "tag") { if (child.type === "tag") {
recurseElement(report, child); recurseElement(report, rawHtml, child);
} }
} }
} }
function checkUrl(report, type, el, url) { function checkUrl(report, rawHtml, type, el, url) {
if (isHttp(url)) { if (isHttp(url)) {
report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`); report.things.push({
type: type + "-http",
description: getHtml(rawHtml, el),
});
} else if (isAbsolute(url)) { } else if (isAbsolute(url)) {
report.warnings.push(`Absolute ${type}: ${getHtml(el)}`); report.things.push({
type: type + "-absolute",
description: getHtml(rawHtml, el),
});
} }
} }
@ -49,12 +67,14 @@ function isAbsolute(url) {
return url.startsWith("/"); return url.startsWith("/");
} }
function getHtml(el) { function getHtml(rawHtml, el) {
// return rawHtml.slice(el.startIndex, el.endIndex); return rawHtml.slice(el.startIndex, el.endIndex);
}
function getText(el) {
let text = ""; let text = "";
for (const child of el.children) { for (const child of el.children) {
if (child.type === "text") text += child.data.trim(); if (child.type === "text") text += child.data.trim();
else if (child.type === "tag") text += getHtml(child); else if (child.type === "tag") text += getText(child);
} }
return text; return text;
} }
@ -67,12 +87,13 @@ function processFile(content) {
reject(error); reject(error);
} else { } else {
let report = { let report = {
things: [],
warnings: [], warnings: [],
infos: [], infos: [],
}; };
for (const el of dom) { for (const el of dom) {
if (el.type === "tag") { if (el.type === "tag") {
recurseElement(report, el); recurseElement(report, content, el);
} }
} }
resolve(report); resolve(report);
@ -85,7 +106,7 @@ function processFile(content) {
}); });
} }
let reports = []; let reports = {};
async function recurseDirectory(reports, path) { async function recurseDirectory(reports, path) {
const dir = await readdir(path, { withFileTypes: true }); const dir = await readdir(path, { withFileTypes: true });
for (const file of dir) { for (const file of dir) {
@ -95,14 +116,35 @@ async function recurseDirectory(reports, path) {
if (!file.name.endsWith(".html")) continue; if (!file.name.endsWith(".html")) continue;
const content = await readFile(filePath, "utf-8"); const content = await readFile(filePath, "utf-8");
console.time(filePath); console.time(filePath);
reports.push(await processFile(content)); reports[path] = await processFile(content);
console.timeEnd(filePath); console.timeEnd(filePath);
} }
} }
} }
await recurseDirectory(reports, dirPath); await recurseDirectory(reports, dirPath);
const totalWarnings = reports.map((r) => r.warnings).flat(); const totalThings = Object.values(reports)
.map((r) => r.things)
.flat();
const kinds = new Set(totalThings.map((t) => t.type));
console.log( console.log(
`Finished with ${reports.length} files read, ${totalWarnings.length} warnings`, `Finished with ${reports.length} files read, ${totalThings.length} things`
totalWarnings );
for (const kind of kinds) {
const count = totalThings.filter((t) => t.type === kind).length;
console.log(`==> ${kind}: ${count}`);
}
console.log("This means:");
const pathBasedCount = totalThings.filter((t) =>
["media-absolute", "media-http"].includes(t.type)
).length;
console.log(
`==> ${pathBasedCount} problems that affect users using legacy IPFS gateways`
);
const mediaHttp = totalThings.filter((t) => t.type === "media-http").length;
console.log(
`==> ${mediaHttp} problems that make the website not self-contained, making it miss content if HTTP is unavailable`
);
const linkHttp = totalThings.filter((t) => t.type === "link-http").length;
console.log(
`==> ${linkHttp} links to HTTP sites, which is not a real concern unless it's a key part of the site's navigation.`
); );