import { Parser } from "htmlparser2"; import { DomHandler } from "domhandler"; import { readdir, readFile } from "fs/promises"; import { join } from "path"; const { argv } = process; const dirPath = argv[2] || "."; function recurseElement(report, el) { if (el.name === "a") { if (el.attribs.href !== undefined) { if (isHttp(el.attribs.href)) { report.infos.push(`HTTP/S link: ${getHtml(el)}`); } else if (isAbsolute(el.attribs.href)) { report.warnings.push(`Absolute link: ${getHtml(el)}`); } } else { report.warnings.push(`Link with no href: ${getHtml(el)}`); } } if (["audio", "video", "img"].includes(el.name)) { if (el.attribs.src) { checkUrl(report, el.name, el, el.attribs.src); } else { report.warnings.push(`${el.name} with no src: ${getHtml(el)}`); } } for (const child of el.children) { if (child.type === "tag") { recurseElement(report, child); } } } function checkUrl(report, type, el, url) { if (isHttp(url)) { report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`); } else if (isAbsolute(url)) { report.warnings.push(`Absolute ${type}: ${getHtml(el)}`); } } function isHttp(url) { const r = /^(https?:\/\/|\/\/)/; return r.test(url); } function isAbsolute(url) { return url.startsWith("/"); } function getHtml(el) { // return rawHtml.slice(el.startIndex, el.endIndex); let text = ""; for (const child of el.children) { if (child.type === "text") text += child.data.trim(); else if (child.type === "tag") text += getHtml(child); } return text; } function processFile(content) { return new Promise((resolve, reject) => { const handler = new DomHandler( (error, dom) => { if (error) { reject(error); } else { let report = { warnings: [], infos: [], }; for (const el of dom) { if (el.type === "tag") { recurseElement(report, el); } } resolve(report); } }, { withEndIndices: true, withStartIndices: true } ); const parser = new Parser(handler); parser.parseComplete(content); }); } async function recurseDirectory(path) { const dir = await readdir(path, { withFileTypes: true }); for (const file of dir) { const filePath = join(path, file.name); if (file.isDirectory()) recurseDirectory(filePath); else { if (!file.name.endsWith(".html")) continue; const content = await readFile(filePath, "utf-8"); console.time(filePath); await processFile(content); console.timeEnd(filePath); } } } await recurseDirectory(dirPath);