2022-11-25 21:05:58 +00:00
|
|
|
import { Parser } from "htmlparser2";
|
|
|
|
import { DomHandler } from "domhandler";
|
|
|
|
|
|
|
|
const noop = () => {};
|
|
|
|
// const log = noop;
|
|
|
|
const { log } = console;
|
|
|
|
|
|
|
|
const stdin = await readStdin();
|
|
|
|
const rawHtml = stdin;
|
|
|
|
|
|
|
|
function recursive(el) {
|
|
|
|
if (el.name === "a") {
|
|
|
|
if (el.attribs.href) {
|
|
|
|
checkUrl("link", el, el.attribs.href);
|
|
|
|
} else {
|
|
|
|
log("Link with no href:", getHtml(el));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (["audio", "video", "img"].includes(el.name)) {
|
|
|
|
if (el.attribs.src) {
|
|
|
|
checkUrl(el.name, el, el.attribs.src);
|
|
|
|
} else {
|
|
|
|
log(`${el.name} with no src:`, getHtml(el));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const child of el.children) {
|
|
|
|
if (child.type === "tag") {
|
|
|
|
recursive(child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-25 21:08:54 +00:00
|
|
|
function checkUrl(type, el, url) {
|
|
|
|
if (isHttp(url)) {
|
|
|
|
log(`HTTP/S ${type}:`, getHtml(el));
|
|
|
|
} else if (isAbsolute(url)) {
|
|
|
|
log(`Absolute ${type}:`, getHtml(el));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function isHttp(url) {
|
|
|
|
const r = /^(https?:\/\/|\/\/)/;
|
|
|
|
return r.test(url);
|
|
|
|
}
|
|
|
|
function isAbsolute(url) {
|
|
|
|
return url.startsWith("/");
|
|
|
|
}
|
|
|
|
|
|
|
|
function getHtml(el) {
|
|
|
|
// return rawHtml.slice(el.startIndex, el.endIndex);
|
|
|
|
let text = "";
|
|
|
|
for (const child of el.children) {
|
|
|
|
if (child.type === "text") text += child.data.trim();
|
|
|
|
else if (child.type === "tag") text += getHtml(child);
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2022-11-25 21:05:58 +00:00
|
|
|
const handler = new DomHandler(
|
|
|
|
(error, dom) => {
|
|
|
|
if (error) {
|
|
|
|
// TODO: Handle error
|
|
|
|
} else {
|
|
|
|
console.time();
|
|
|
|
for (const el of dom) {
|
|
|
|
if (el.type === "tag") {
|
|
|
|
recursive(el);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.timeEnd();
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{ withEndIndices: true, withStartIndices: true }
|
|
|
|
);
|
|
|
|
const parser = new Parser(handler);
|
|
|
|
parser.parseComplete(rawHtml);
|
2022-11-25 21:08:54 +00:00
|
|
|
|
|
|
|
function readStdin() {
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
let buffer = "";
|
|
|
|
process.stdin.resume();
|
|
|
|
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
|
|
|
|
process.stdin.on("close", () => resolve(buffer));
|
|
|
|
});
|
|
|
|
}
|