Compare commits

...

6 commits

Author SHA1 Message Date
93eaa423e1 Ignore npmrc 2022-11-25 19:08:30 -03:00
119969fcc1 Collect reports 2022-11-25 19:07:13 -03:00
1150850fed Allow href=""
It means going to index
2022-11-25 19:05:45 -03:00
3eb55ca4dd Only make HTTP/s links as info messages 2022-11-25 19:05:24 -03:00
13ef15c275 Rename to @nulo/site-analyzer to publish in Gitea 2022-11-25 18:50:21 -03:00
d0bd26b959 Recurse directory instead of pulling stdin 2022-11-25 18:47:57 -03:00
3 changed files with 66 additions and 41 deletions

3
.gitignore vendored
View file

@ -1,3 +1,6 @@
node_modules/
*.html
# Contains publish secrets
.npmrc

102
index.js
View file

@ -1,41 +1,43 @@
import { Parser } from "htmlparser2";
import { DomHandler } from "domhandler";
import { readdir, readFile } from "fs/promises";
import { join } from "path";
const noop = () => {};
// const log = noop;
const { log } = console;
const { argv } = process;
const dirPath = argv[2] || ".";
const stdin = await readStdin();
const rawHtml = stdin;
function recursive(el) {
function recurseElement(report, el) {
if (el.name === "a") {
if (el.attribs.href) {
checkUrl("link", el, el.attribs.href);
if (el.attribs.href !== undefined) {
if (isHttp(el.attribs.href)) {
report.infos.push(`HTTP/S link: ${getHtml(el)}`);
} else if (isAbsolute(el.attribs.href)) {
report.warnings.push(`Absolute link: ${getHtml(el)}`);
}
} else {
log("Link with no href:", getHtml(el));
report.warnings.push(`Link with no href: ${getHtml(el)}`);
}
}
if (["audio", "video", "img"].includes(el.name)) {
if (el.attribs.src) {
checkUrl(el.name, el, el.attribs.src);
checkUrl(report, el.name, el, el.attribs.src);
} else {
log(`${el.name} with no src:`, getHtml(el));
report.warnings.push(`${el.name} with no src: ${getHtml(el)}`);
}
}
for (const child of el.children) {
if (child.type === "tag") {
recursive(child);
recurseElement(report, child);
}
}
}
function checkUrl(type, el, url) {
function checkUrl(report, type, el, url) {
if (isHttp(url)) {
log(`HTTP/S ${type}:`, getHtml(el));
report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`);
} else if (isAbsolute(url)) {
log(`Absolute ${type}:`, getHtml(el));
report.warnings.push(`Absolute ${type}: ${getHtml(el)}`);
}
}
@ -57,30 +59,50 @@ function getHtml(el) {
return text;
}
const handler = new DomHandler(
(error, dom) => {
if (error) {
// TODO: Handle error
} else {
console.time();
for (const el of dom) {
if (el.type === "tag") {
recursive(el);
}
}
console.timeEnd();
}
},
{ withEndIndices: true, withStartIndices: true }
);
const parser = new Parser(handler);
parser.parseComplete(rawHtml);
function readStdin() {
function processFile(content) {
return new Promise((resolve, reject) => {
let buffer = "";
process.stdin.resume();
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
process.stdin.on("close", () => resolve(buffer));
const handler = new DomHandler(
(error, dom) => {
if (error) {
reject(error);
} else {
let report = {
warnings: [],
infos: [],
};
for (const el of dom) {
if (el.type === "tag") {
recurseElement(report, el);
}
}
resolve(report);
}
},
{ withEndIndices: true, withStartIndices: true }
);
const parser = new Parser(handler);
parser.parseComplete(content);
});
}
let reports = [];
async function recurseDirectory(reports, path) {
const dir = await readdir(path, { withFileTypes: true });
for (const file of dir) {
const filePath = join(path, file.name);
if (file.isDirectory()) await recurseDirectory(reports, filePath);
else {
if (!file.name.endsWith(".html")) continue;
const content = await readFile(filePath, "utf-8");
console.time(filePath);
reports.push(await processFile(content));
console.timeEnd(filePath);
}
}
}
await recurseDirectory(reports, dirPath);
const totalWarnings = reports.map((r) => r.warnings).flat();
console.log(
`Finished with ${reports.length} files read, ${totalWarnings.length} warnings`,
totalWarnings
);

View file

@ -1,5 +1,5 @@
{
"name": "site-analyzer",
"name": "@nulo/site-analyzer",
"type": "module",
"version": "0.0.1",
"description": "",