This commit is contained in:
Cat /dev/Nulo 2022-11-25 18:08:54 -03:00
parent ad0ef24257
commit 7f4649df37

View file

@ -6,43 +6,9 @@ const noop = () => {};
// const log = noop;
const { log } = console;
function readStdin() {
return new Promise((resolve, reject) => {
let buffer = "";
process.stdin.resume();
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
process.stdin.on("close", () => resolve(buffer));
});
}
const stdin = await readStdin();
const rawHtml = stdin;
function isHttp(url) {
const r = /^(https?:\/\/|\/\/)/;
return r.test(url);
}
function isAbsolute(url) {
return url.startsWith("/");
}
function getHtml(el) {
// return rawHtml.slice(el.startIndex, el.endIndex);
let text = "";
for (const child of el.children) {
if (child.type === "text") text += child.data.trim();
else if (child.type === "tag") text += getHtml(child);
}
return text;
}
function checkUrl(type, el, url) {
if (isHttp(url)) {
log(`HTTP/S ${type}:`, getHtml(el));
} else if (isAbsolute(url)) {
log(`Absolute ${type}:`, getHtml(el));
}
}
function recursive(el) {
if (el.name === "a") {
if (el.attribs.href) {
@ -66,12 +32,37 @@ function recursive(el) {
}
}
function checkUrl(type, el, url) {
if (isHttp(url)) {
log(`HTTP/S ${type}:`, getHtml(el));
} else if (isAbsolute(url)) {
log(`Absolute ${type}:`, getHtml(el));
}
}
function isHttp(url) {
const r = /^(https?:\/\/|\/\/)/;
return r.test(url);
}
function isAbsolute(url) {
return url.startsWith("/");
}
function getHtml(el) {
// return rawHtml.slice(el.startIndex, el.endIndex);
let text = "";
for (const child of el.children) {
if (child.type === "text") text += child.data.trim();
else if (child.type === "tag") text += getHtml(child);
}
return text;
}
const handler = new DomHandler(
(error, dom) => {
if (error) {
// TODO: Handle error
} else {
// for (let i = 0; i < 1000; i++) {
console.time();
for (const el of dom) {
if (el.type === "tag") {
@ -79,10 +70,18 @@ const handler = new DomHandler(
}
}
console.timeEnd();
// }
}
},
{ withEndIndices: true, withStartIndices: true }
);
const parser = new Parser(handler);
parser.parseComplete(rawHtml);
function readStdin() {
return new Promise((resolve, reject) => {
let buffer = "";
process.stdin.resume();
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
process.stdin.on("close", () => resolve(buffer));
});
}