Compare commits
6 commits
86885be850
...
93eaa423e1
Author | SHA1 | Date | |
---|---|---|---|
93eaa423e1 | |||
119969fcc1 | |||
1150850fed | |||
3eb55ca4dd | |||
13ef15c275 | |||
d0bd26b959 |
3 changed files with 66 additions and 41 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,6 @@
|
|||
node_modules/
|
||||
|
||||
*.html
|
||||
|
||||
# Contains publish secrets
|
||||
.npmrc
|
||||
|
|
102
index.js
102
index.js
|
@ -1,41 +1,43 @@
|
|||
import { Parser } from "htmlparser2";
|
||||
import { DomHandler } from "domhandler";
|
||||
import { readdir, readFile } from "fs/promises";
|
||||
import { join } from "path";
|
||||
|
||||
const noop = () => {};
|
||||
// const log = noop;
|
||||
const { log } = console;
|
||||
const { argv } = process;
|
||||
const dirPath = argv[2] || ".";
|
||||
|
||||
const stdin = await readStdin();
|
||||
const rawHtml = stdin;
|
||||
|
||||
function recursive(el) {
|
||||
function recurseElement(report, el) {
|
||||
if (el.name === "a") {
|
||||
if (el.attribs.href) {
|
||||
checkUrl("link", el, el.attribs.href);
|
||||
if (el.attribs.href !== undefined) {
|
||||
if (isHttp(el.attribs.href)) {
|
||||
report.infos.push(`HTTP/S link: ${getHtml(el)}`);
|
||||
} else if (isAbsolute(el.attribs.href)) {
|
||||
report.warnings.push(`Absolute link: ${getHtml(el)}`);
|
||||
}
|
||||
} else {
|
||||
log("Link with no href:", getHtml(el));
|
||||
report.warnings.push(`Link with no href: ${getHtml(el)}`);
|
||||
}
|
||||
}
|
||||
if (["audio", "video", "img"].includes(el.name)) {
|
||||
if (el.attribs.src) {
|
||||
checkUrl(el.name, el, el.attribs.src);
|
||||
checkUrl(report, el.name, el, el.attribs.src);
|
||||
} else {
|
||||
log(`${el.name} with no src:`, getHtml(el));
|
||||
report.warnings.push(`${el.name} with no src: ${getHtml(el)}`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const child of el.children) {
|
||||
if (child.type === "tag") {
|
||||
recursive(child);
|
||||
recurseElement(report, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function checkUrl(type, el, url) {
|
||||
function checkUrl(report, type, el, url) {
|
||||
if (isHttp(url)) {
|
||||
log(`HTTP/S ${type}:`, getHtml(el));
|
||||
report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`);
|
||||
} else if (isAbsolute(url)) {
|
||||
log(`Absolute ${type}:`, getHtml(el));
|
||||
report.warnings.push(`Absolute ${type}: ${getHtml(el)}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -57,30 +59,50 @@ function getHtml(el) {
|
|||
return text;
|
||||
}
|
||||
|
||||
const handler = new DomHandler(
|
||||
(error, dom) => {
|
||||
if (error) {
|
||||
// TODO: Handle error
|
||||
} else {
|
||||
console.time();
|
||||
for (const el of dom) {
|
||||
if (el.type === "tag") {
|
||||
recursive(el);
|
||||
}
|
||||
}
|
||||
console.timeEnd();
|
||||
}
|
||||
},
|
||||
{ withEndIndices: true, withStartIndices: true }
|
||||
);
|
||||
const parser = new Parser(handler);
|
||||
parser.parseComplete(rawHtml);
|
||||
|
||||
function readStdin() {
|
||||
function processFile(content) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let buffer = "";
|
||||
process.stdin.resume();
|
||||
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
|
||||
process.stdin.on("close", () => resolve(buffer));
|
||||
const handler = new DomHandler(
|
||||
(error, dom) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
} else {
|
||||
let report = {
|
||||
warnings: [],
|
||||
infos: [],
|
||||
};
|
||||
for (const el of dom) {
|
||||
if (el.type === "tag") {
|
||||
recurseElement(report, el);
|
||||
}
|
||||
}
|
||||
resolve(report);
|
||||
}
|
||||
},
|
||||
{ withEndIndices: true, withStartIndices: true }
|
||||
);
|
||||
const parser = new Parser(handler);
|
||||
parser.parseComplete(content);
|
||||
});
|
||||
}
|
||||
|
||||
let reports = [];
|
||||
async function recurseDirectory(reports, path) {
|
||||
const dir = await readdir(path, { withFileTypes: true });
|
||||
for (const file of dir) {
|
||||
const filePath = join(path, file.name);
|
||||
if (file.isDirectory()) await recurseDirectory(reports, filePath);
|
||||
else {
|
||||
if (!file.name.endsWith(".html")) continue;
|
||||
const content = await readFile(filePath, "utf-8");
|
||||
console.time(filePath);
|
||||
reports.push(await processFile(content));
|
||||
console.timeEnd(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
await recurseDirectory(reports, dirPath);
|
||||
const totalWarnings = reports.map((r) => r.warnings).flat();
|
||||
console.log(
|
||||
`Finished with ${reports.length} files read, ${totalWarnings.length} warnings`,
|
||||
totalWarnings
|
||||
);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"name": "site-analyzer",
|
||||
"name": "@nulo/site-analyzer",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"description": "",
|
||||
|
|
Loading…
Reference in a new issue