site-analyzer/index.js

98 lines
2.5 KiB
JavaScript
Raw Normal View History

2022-11-25 21:05:58 +00:00
import { Parser } from "htmlparser2";
import { DomHandler } from "domhandler";
import { readdir, readFile } from "fs/promises";
import { join } from "path";
2022-11-25 21:05:58 +00:00
const { argv } = process;
const dirPath = argv[2] || ".";
2022-11-25 21:05:58 +00:00
function recurseElement(report, el) {
2022-11-25 21:05:58 +00:00
if (el.name === "a") {
if (el.attribs.href) {
checkUrl(report, "link", el, el.attribs.href);
2022-11-25 21:05:58 +00:00
} else {
report.warnings.push(`Link with no href: ${getHtml(el)}`);
2022-11-25 21:05:58 +00:00
}
}
if (["audio", "video", "img"].includes(el.name)) {
if (el.attribs.src) {
checkUrl(report, el.name, el, el.attribs.src);
2022-11-25 21:05:58 +00:00
} else {
report.warnings.push(`${el.name} with no src: ${getHtml(el)}`);
2022-11-25 21:05:58 +00:00
}
}
for (const child of el.children) {
if (child.type === "tag") {
recurseElement(report, child);
2022-11-25 21:05:58 +00:00
}
}
}
function checkUrl(report, type, el, url) {
2022-11-25 21:08:54 +00:00
if (isHttp(url)) {
report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`);
2022-11-25 21:08:54 +00:00
} else if (isAbsolute(url)) {
report.warnings.push(`Absolute ${type}: ${getHtml(el)}`);
2022-11-25 21:08:54 +00:00
}
}
function isHttp(url) {
const r = /^(https?:\/\/|\/\/)/;
return r.test(url);
}
function isAbsolute(url) {
return url.startsWith("/");
}
function getHtml(el) {
// return rawHtml.slice(el.startIndex, el.endIndex);
let text = "";
for (const child of el.children) {
if (child.type === "text") text += child.data.trim();
else if (child.type === "tag") text += getHtml(child);
}
return text;
}
function processFile(content) {
2022-11-25 21:08:54 +00:00
return new Promise((resolve, reject) => {
const handler = new DomHandler(
(error, dom) => {
if (error) {
reject(error);
} else {
let report = {
warnings: [],
};
for (const el of dom) {
if (el.type === "tag") {
recurseElement(report, el);
}
}
resolve(report);
}
},
{ withEndIndices: true, withStartIndices: true }
);
const parser = new Parser(handler);
parser.parseComplete(content);
2022-11-25 21:08:54 +00:00
});
}
async function recurseDirectory(path) {
const dir = await readdir(path, { withFileTypes: true });
for (const file of dir) {
const filePath = join(path, file.name);
if (file.isDirectory()) recurseDirectory(filePath);
else {
if (!file.name.endsWith(".html")) continue;
const content = await readFile(filePath, "utf-8");
console.time(filePath);
await processFile(content);
console.timeEnd(filePath);
}
}
}
await recurseDirectory(dirPath);