Cleanup
This commit is contained in:
parent
ad0ef24257
commit
7f4649df37
1 changed files with 35 additions and 36 deletions
71
index.js
71
index.js
|
@ -6,43 +6,9 @@ const noop = () => {};
|
|||
// const log = noop;
|
||||
const { log } = console;
|
||||
|
||||
function readStdin() {
|
||||
return new Promise((resolve, reject) => {
|
||||
let buffer = "";
|
||||
process.stdin.resume();
|
||||
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
|
||||
process.stdin.on("close", () => resolve(buffer));
|
||||
});
|
||||
}
|
||||
const stdin = await readStdin();
|
||||
const rawHtml = stdin;
|
||||
|
||||
function isHttp(url) {
|
||||
const r = /^(https?:\/\/|\/\/)/;
|
||||
return r.test(url);
|
||||
}
|
||||
function isAbsolute(url) {
|
||||
return url.startsWith("/");
|
||||
}
|
||||
|
||||
function getHtml(el) {
|
||||
// return rawHtml.slice(el.startIndex, el.endIndex);
|
||||
let text = "";
|
||||
for (const child of el.children) {
|
||||
if (child.type === "text") text += child.data.trim();
|
||||
else if (child.type === "tag") text += getHtml(child);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function checkUrl(type, el, url) {
|
||||
if (isHttp(url)) {
|
||||
log(`HTTP/S ${type}:`, getHtml(el));
|
||||
} else if (isAbsolute(url)) {
|
||||
log(`Absolute ${type}:`, getHtml(el));
|
||||
}
|
||||
}
|
||||
|
||||
function recursive(el) {
|
||||
if (el.name === "a") {
|
||||
if (el.attribs.href) {
|
||||
|
@ -66,12 +32,37 @@ function recursive(el) {
|
|||
}
|
||||
}
|
||||
|
||||
function checkUrl(type, el, url) {
|
||||
if (isHttp(url)) {
|
||||
log(`HTTP/S ${type}:`, getHtml(el));
|
||||
} else if (isAbsolute(url)) {
|
||||
log(`Absolute ${type}:`, getHtml(el));
|
||||
}
|
||||
}
|
||||
|
||||
function isHttp(url) {
|
||||
const r = /^(https?:\/\/|\/\/)/;
|
||||
return r.test(url);
|
||||
}
|
||||
function isAbsolute(url) {
|
||||
return url.startsWith("/");
|
||||
}
|
||||
|
||||
function getHtml(el) {
|
||||
// return rawHtml.slice(el.startIndex, el.endIndex);
|
||||
let text = "";
|
||||
for (const child of el.children) {
|
||||
if (child.type === "text") text += child.data.trim();
|
||||
else if (child.type === "tag") text += getHtml(child);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
const handler = new DomHandler(
|
||||
(error, dom) => {
|
||||
if (error) {
|
||||
// TODO: Handle error
|
||||
} else {
|
||||
// for (let i = 0; i < 1000; i++) {
|
||||
console.time();
|
||||
for (const el of dom) {
|
||||
if (el.type === "tag") {
|
||||
|
@ -79,10 +70,18 @@ const handler = new DomHandler(
|
|||
}
|
||||
}
|
||||
console.timeEnd();
|
||||
// }
|
||||
}
|
||||
},
|
||||
{ withEndIndices: true, withStartIndices: true }
|
||||
);
|
||||
const parser = new Parser(handler);
|
||||
parser.parseComplete(rawHtml);
|
||||
|
||||
function readStdin() {
|
||||
return new Promise((resolve, reject) => {
|
||||
let buffer = "";
|
||||
process.stdin.resume();
|
||||
process.stdin.on("data", (d) => (buffer = buffer.concat(d.toString())));
|
||||
process.stdin.on("close", () => resolve(buffer));
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue