Compare commits

..

No commits in common. "d20d8fc238dd66c8eedae3a5e35826a7afa19f4d" and "141c2a02bf3fcadc35223d9c2735fcfc3a5999f9" have entirely different histories.

6 changed files with 23 additions and 153 deletions

2
.gitignore vendored
View file

@ -4,5 +4,3 @@ node_modules/
# Contains publish secrets # Contains publish secrets
.npmrc .npmrc
src/**.js

View file

@ -8,8 +8,7 @@ pipeline:
- apk add --quiet curl nodejs npm git git-lfs tar zstd - apk add --quiet curl nodejs npm git git-lfs tar zstd
- curl -fsSL "https://github.com/pnpm/pnpm/releases/latest/download/pnpm-linuxstatic-x64" -o /bin/pnpm; chmod +x /bin/pnpm - curl -fsSL "https://github.com/pnpm/pnpm/releases/latest/download/pnpm-linuxstatic-x64" -o /bin/pnpm; chmod +x /bin/pnpm
- pnpm set registry http://npm.proxy.coso - pnpm set registry http://npm.proxy.coso
- pnpm install --silent - pnpm install --prod --silent
- pnpm tsc
- git clone https://gitea.nulo.in/Nulo/site-analyzer-demos - git clone https://gitea.nulo.in/Nulo/site-analyzer-demos
- cd site-analyzer-demos - cd site-analyzer-demos
@ -17,7 +16,7 @@ pipeline:
- tar xf la-periferica.com.ar.tar.zst - tar xf la-periferica.com.ar.tar.zst
- tar xf distributed.press.tar.zst - tar xf distributed.press.tar.zst
- time node ../src nulo.in/ - time node .. nulo.in/
- time node ../src la-periferica.com.ar/ - time node .. la-periferica.com.ar/
- time node ../src distributed.press/ - time node .. distributed.press/

View file

@ -1,25 +1,12 @@
import { Parser } from "htmlparser2"; import { Parser } from "htmlparser2";
import { DomHandler, Element } from "domhandler"; import { DomHandler } from "domhandler";
import { readdir, readFile } from "fs/promises"; import { readdir, readFile } from "fs/promises";
import { join } from "path"; import { join } from "path";
const { argv } = process; const { argv } = process;
const dirPath = argv[2] || "."; const dirPath = argv[2] || ".";
interface Thing {
type:
| "link-http"
| "link-absolute"
| "link-no-href"
| "media-http"
| "media-absolute"
| "media-no-src";
description: string;
}
interface Report {
things: Thing[];
}
function recurseElement(report: Report, rawHtml: string, el: Element) { function recurseElement(report, rawHtml, el) {
if (el.name === "a") { if (el.name === "a") {
if (el.attribs.href !== undefined) { if (el.attribs.href !== undefined) {
if (isHttp(el.attribs.href)) { if (isHttp(el.attribs.href)) {
@ -40,9 +27,9 @@ function recurseElement(report: Report, rawHtml: string, el: Element) {
}); });
} }
} }
if (["audio", "video", "img", "source"].includes(el.name)) { if (["audio", "video", "img"].includes(el.name)) {
if (el.attribs.src) { if (el.attribs.src) {
checkUrl(report, rawHtml, el, el.attribs.src); checkUrl(report, rawHtml, "media", el, el.attribs.src);
} else { } else {
report.things.push({ report.things.push({
type: "media-no-src", type: "media-no-src",
@ -58,32 +45,32 @@ function recurseElement(report: Report, rawHtml: string, el: Element) {
} }
} }
function checkUrl(report: Report, rawHtml: string, el: Element, url: string) { function checkUrl(report, rawHtml, type, el, url) {
if (isHttp(url)) { if (isHttp(url)) {
report.things.push({ report.things.push({
type: "media-http", type: type + "-http",
description: getHtml(rawHtml, el), description: getHtml(rawHtml, el),
}); });
} else if (isAbsolute(url)) { } else if (isAbsolute(url)) {
report.things.push({ report.things.push({
type: "media-absolute", type: type + "-absolute",
description: getHtml(rawHtml, el), description: getHtml(rawHtml, el),
}); });
} }
} }
function isHttp(url: string) { function isHttp(url) {
const r = /^(https?:\/\/|\/\/)/; const r = /^(https?:\/\/|\/\/)/;
return r.test(url); return r.test(url);
} }
function isAbsolute(url: string) { function isAbsolute(url) {
return url.startsWith("/"); return url.startsWith("/");
} }
function getHtml(rawHtml: string, el: Element) { function getHtml(rawHtml, el) {
return rawHtml.slice(el.startIndex!, el.endIndex!); return rawHtml.slice(el.startIndex, el.endIndex);
} }
function getText(el: Element) { function getText(el) {
let text = ""; let text = "";
for (const child of el.children) { for (const child of el.children) {
if (child.type === "text") text += child.data.trim(); if (child.type === "text") text += child.data.trim();
@ -92,15 +79,17 @@ function getText(el: Element) {
return text; return text;
} }
function processFile(content: string): Promise<Report> { function processFile(content) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const handler = new DomHandler( const handler = new DomHandler(
(error, dom) => { (error, dom) => {
if (error) { if (error) {
reject(error); reject(error);
} else { } else {
let report: Report = { let report = {
things: [], things: [],
warnings: [],
infos: [],
}; };
for (const el of dom) { for (const el of dom) {
if (el.type === "tag") { if (el.type === "tag") {
@ -117,11 +106,8 @@ function processFile(content: string): Promise<Report> {
}); });
} }
interface Reports { let reports = {};
[key: string]: Report; async function recurseDirectory(reports, path) {
}
let reports: Reports = {};
async function recurseDirectory(reports: Reports, path: string) {
const dir = await readdir(path, { withFileTypes: true }); const dir = await readdir(path, { withFileTypes: true });
for (const file of dir) { for (const file of dir) {
const filePath = join(path, file.name); const filePath = join(path, file.name);

View file

@ -16,7 +16,6 @@
}, },
"devDependencies": { "devDependencies": {
"@types/htmlparser2": "^3.10.3", "@types/htmlparser2": "^3.10.3",
"@types/node": "^18.11.9", "@types/node": "^18.11.9"
"typescript": "^4.9.3"
} }
} }

View file

@ -5,7 +5,6 @@ specifiers:
'@types/node': ^18.11.9 '@types/node': ^18.11.9
domhandler: ^5.0.3 domhandler: ^5.0.3
htmlparser2: ^8.0.1 htmlparser2: ^8.0.1
typescript: ^4.9.3
dependencies: dependencies:
domhandler: 5.0.3 domhandler: 5.0.3
@ -14,7 +13,6 @@ dependencies:
devDependencies: devDependencies:
'@types/htmlparser2': 3.10.3 '@types/htmlparser2': 3.10.3
'@types/node': 18.11.9 '@types/node': 18.11.9
typescript: 4.9.3
packages: packages:
@ -86,9 +84,3 @@ packages:
domutils: 3.0.1 domutils: 3.0.1
entities: 4.4.0 entities: 4.4.0
dev: false dev: false
/typescript/4.9.3:
resolution: {integrity: sha512-CIfGzTelbKNEnLpLdGFgdyKhG23CKdKgQPOBc+OUNrkJ2vr+KSzsSV5kq5iWhEQbok+quxgGzrAtGWCyU7tHnA==}
engines: {node: '>=4.2.0'}
hasBin: true
dev: true

View file

@ -1,104 +0,0 @@
{
"include": ["src/**/*"],
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "es2017" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "es2022" /* Specify what module code is generated. */,
// "rootDir": "./", /* Specify the root folder within your source files. */
"moduleResolution": "node" /* Specify how TypeScript looks up a file from a given module specifier. */,
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
// "outDir": "./", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */,
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */,
/* Type Checking */
"strict": true /* Enable all strict type-checking options. */,
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
}
}