import { Parser } from "htmlparser2"; import { DomHandler } from "domhandler"; import { performance } from "perf_hooks"; const noop = () => {}; // const log = noop; const { log } = console; function readStdin() { return new Promise((resolve, reject) => { let buffer = ""; process.stdin.resume(); process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString()))); process.stdin.on("close", () => resolve(buffer)); }); } const stdin = await readStdin(); const rawHtml = stdin; function isHttp(url) { const r = /^(https?:\/\/|\/\/)/; return r.test(url); } function isAbsolute(url) { return url.startsWith("/"); } function getHtml(el) { // return rawHtml.slice(el.startIndex, el.endIndex); let text = ""; for (const child of el.children) { if (child.type === "text") text += child.data.trim(); else if (child.type === "tag") text += getHtml(child); } return text; } function checkUrl(type, el, url) { if (isHttp(url)) { log(`HTTP/S ${type}:`, getHtml(el)); } else if (isAbsolute(url)) { log(`Absolute ${type}:`, getHtml(el)); } } function recursive(el) { if (el.name === "a") { if (el.attribs.href) { checkUrl("link", el, el.attribs.href); } else { log("Link with no href:", getHtml(el)); } } if (["audio", "video", "img"].includes(el.name)) { if (el.attribs.src) { checkUrl(el.name, el, el.attribs.src); } else { log(`${el.name} with no src:`, getHtml(el)); } } for (const child of el.children) { if (child.type === "tag") { recursive(child); } } } const handler = new DomHandler( (error, dom) => { if (error) { // TODO: Handle error } else { // for (let i = 0; i < 1000; i++) { console.time(); for (const el of dom) { if (el.type === "tag") { recursive(el); } } console.timeEnd(); // } } }, { withEndIndices: true, withStartIndices: true } ); const parser = new Parser(handler); parser.parseComplete(rawHtml);