From d0bd26b9593c5d2f80c6ad5aea3688a1f4a91826 Mon Sep 17 00:00:00 2001 From: Nulo Date: Fri, 25 Nov 2022 18:47:57 -0300 Subject: [PATCH] Recurse directory instead of pulling stdin --- index.js | 89 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/index.js b/index.js index c82c48e..461bb5c 100644 --- a/index.js +++ b/index.js @@ -1,41 +1,39 @@ import { Parser } from "htmlparser2"; import { DomHandler } from "domhandler"; +import { readdir, readFile } from "fs/promises"; +import { join } from "path"; -const noop = () => {}; -// const log = noop; -const { log } = console; +const { argv } = process; +const dirPath = argv[2] || "."; -const stdin = await readStdin(); -const rawHtml = stdin; - -function recursive(el) { +function recurseElement(report, el) { if (el.name === "a") { if (el.attribs.href) { - checkUrl("link", el, el.attribs.href); + checkUrl(report, "link", el, el.attribs.href); } else { - log("Link with no href:", getHtml(el)); + report.warnings.push(`Link with no href: ${getHtml(el)}`); } } if (["audio", "video", "img"].includes(el.name)) { if (el.attribs.src) { - checkUrl(el.name, el, el.attribs.src); + checkUrl(report, el.name, el, el.attribs.src); } else { - log(`${el.name} with no src:`, getHtml(el)); + report.warnings.push(`${el.name} with no src: ${getHtml(el)}`); } } for (const child of el.children) { if (child.type === "tag") { - recursive(child); + recurseElement(report, child); } } } -function checkUrl(type, el, url) { +function checkUrl(report, type, el, url) { if (isHttp(url)) { - log(`HTTP/S ${type}:`, getHtml(el)); + report.warnings.push(`HTTP/S ${type}: ${getHtml(el)}`); } else if (isAbsolute(url)) { - log(`Absolute ${type}:`, getHtml(el)); + report.warnings.push(`Absolute ${type}: ${getHtml(el)}`); } } @@ -57,30 +55,43 @@ function getHtml(el) { return text; } -const handler = new DomHandler( - (error, dom) => { - if (error) { - // TODO: Handle error - } else { - console.time(); - for (const el of dom) { - if (el.type === "tag") { - recursive(el); - } - } - console.timeEnd(); - } - }, - { withEndIndices: true, withStartIndices: true } -); -const parser = new Parser(handler); -parser.parseComplete(rawHtml); - -function readStdin() { +function processFile(content) { return new Promise((resolve, reject) => { - let buffer = ""; - process.stdin.resume(); - process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString()))); - process.stdin.on("close", () => resolve(buffer)); + const handler = new DomHandler( + (error, dom) => { + if (error) { + reject(error); + } else { + let report = { + warnings: [], + }; + for (const el of dom) { + if (el.type === "tag") { + recurseElement(report, el); + } + } + resolve(report); + } + }, + { withEndIndices: true, withStartIndices: true } + ); + const parser = new Parser(handler); + parser.parseComplete(content); }); } + +async function recurseDirectory(path) { + const dir = await readdir(path, { withFileTypes: true }); + for (const file of dir) { + const filePath = join(path, file.name); + if (file.isDirectory()) recurseDirectory(filePath); + else { + if (!file.name.endsWith(".html")) continue; + const content = await readFile(filePath, "utf-8"); + console.time(filePath); + await processFile(content); + console.timeEnd(filePath); + } + } +} +await recurseDirectory(dirPath);