Recurse directory instead of pulling stdin
This commit is contained in:
parent
86885be850
commit
d0bd26b959
1 changed files with 50 additions and 39 deletions
89
index.js
89
index.js
|
@ -1,41 +1,39 @@
|
||||||
import { Parser } from "htmlparser2";
|
import { Parser } from "htmlparser2";
|
||||||
import { DomHandler } from "domhandler";
|
import { DomHandler } from "domhandler";
|
||||||
|
import { readdir, readFile } from "fs/promises";
|
||||||
|
import { join } from "path";
|
||||||
|
|
||||||
const noop = () => {};
|
const { argv } = process;
|
||||||
// const log = noop;
|
const dirPath = argv[2] || ".";
|
||||||
const { log } = console;
|
|
||||||
|
|
||||||
const stdin = await readStdin();
|
function recurseElement(report, el) {
|
||||||
const rawHtml = stdin;
|
|
||||||
|
|
||||||
function recursive(el) {
|
|
||||||
if (el.name === "a") {
|
if (el.name === "a") {
|
||||||
if (el.attribs.href) {
|
if (el.attribs.href) {
|
||||||
checkUrl("link", el, el.attribs.href);
|
checkUrl(report, "link", el, el.attribs.href);
|
||||||
} else {
|
} else {
|
||||||
log("Link with no href:", getHtml(el));
|
report.warnings.push(`Link with no href: ${getHtml(el)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (["audio", "video", "img"].includes(el.name)) {
|
if (["audio", "video", "img"].includes(el.name)) {
|
||||||
if (el.attribs.src) {
|
if (el.attribs.src) {
|
||||||
checkUrl(el.name, el, el.attribs.src);
|
checkUrl(report, el.name, el, el.attribs.src);
|
||||||
} else {
|
} else {
|
||||||
log(`${el.name} with no src:`, getHtml(el));
|
report.warnings.push(`${el.name} with no src: ${getHtml(el)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const child of el.children) {
|
for (const child of el.children) {
|
||||||
if (child.type === "tag") {
|
if (child.type === "tag") {
|
||||||
recursive(child);
|
recurseElement(report, child);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkUrl(type, el, url) {
|
function checkUrl(report, type, el, url) {
|
||||||
if (isHttp(url)) {
|
if (isHttp(url)) {
|
||||||
log(`HTTP/S ${type}:`, getHtml(el));
|
report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`);
|
||||||
} else if (isAbsolute(url)) {
|
} else if (isAbsolute(url)) {
|
||||||
log(`Absolute ${type}:`, getHtml(el));
|
report.warnings.push(`Absolute ${type}: ${getHtml(el)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,30 +55,43 @@ function getHtml(el) {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
const handler = new DomHandler(
|
function processFile(content) {
|
||||||
(error, dom) => {
|
|
||||||
if (error) {
|
|
||||||
// TODO: Handle error
|
|
||||||
} else {
|
|
||||||
console.time();
|
|
||||||
for (const el of dom) {
|
|
||||||
if (el.type === "tag") {
|
|
||||||
recursive(el);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.timeEnd();
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{ withEndIndices: true, withStartIndices: true }
|
|
||||||
);
|
|
||||||
const parser = new Parser(handler);
|
|
||||||
parser.parseComplete(rawHtml);
|
|
||||||
|
|
||||||
function readStdin() {
|
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
let buffer = "";
|
const handler = new DomHandler(
|
||||||
process.stdin.resume();
|
(error, dom) => {
|
||||||
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
|
if (error) {
|
||||||
process.stdin.on("close", () => resolve(buffer));
|
reject(error);
|
||||||
|
} else {
|
||||||
|
let report = {
|
||||||
|
warnings: [],
|
||||||
|
};
|
||||||
|
for (const el of dom) {
|
||||||
|
if (el.type === "tag") {
|
||||||
|
recurseElement(report, el);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve(report);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ withEndIndices: true, withStartIndices: true }
|
||||||
|
);
|
||||||
|
const parser = new Parser(handler);
|
||||||
|
parser.parseComplete(content);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function recurseDirectory(path) {
|
||||||
|
const dir = await readdir(path, { withFileTypes: true });
|
||||||
|
for (const file of dir) {
|
||||||
|
const filePath = join(path, file.name);
|
||||||
|
if (file.isDirectory()) recurseDirectory(filePath);
|
||||||
|
else {
|
||||||
|
if (!file.name.endsWith(".html")) continue;
|
||||||
|
const content = await readFile(filePath, "utf-8");
|
||||||
|
console.time(filePath);
|
||||||
|
await processFile(content);
|
||||||
|
console.timeEnd(filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await recurseDirectory(dirPath);
|
||||||
|
|
Loading…
Reference in a new issue