experimental

This commit is contained in:
Cat /dev/Nulo 2022-11-25 18:04:00 -03:00
commit c3f8ed8ee4
5 changed files with 134 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
zig-cache
zig-out

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "rem"]
path = rem
url = https://github.com/chwayne/rem

40
build.zig Normal file
View file

@ -0,0 +1,40 @@
const std = @import("std");
pub fn build(b: *std.build.Builder) void {
const rem_pkg = std.build.Pkg{
.name = "rem",
.source = .{ .path = "./rem/rem.zig" },
};
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard release options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
const mode = b.standardReleaseOptions();
const exe = b.addExecutable("site-analyzer", "src/main.zig");
exe.setTarget(target);
exe.setBuildMode(mode);
exe.addPackage(rem_pkg);
exe.install();
const run_cmd = exe.run();
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
const exe_tests = b.addTest("src/main.zig");
exe_tests.setTarget(target);
exe_tests.setBuildMode(mode);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&exe_tests.step);
}

1
rem Submodule

@ -0,0 +1 @@
Subproject commit 53d2307030c9b9ce3501b007de86b49b0838d3ae

87
src/main.zig Normal file
View file

@ -0,0 +1,87 @@
const std = @import("std");
const rem = @import("rem");
fn utf8DecodeString(allocator: std.mem.Allocator, string: []const u8) ![]u21 {
var list = std.ArrayList(u21).init(allocator);
errdefer list.deinit();
var decoded_it = (try std.unicode.Utf8View.init(string)).iterator();
while (decoded_it.nextCodepoint()) |codepoint| {
try list.append(codepoint);
}
return list.toOwnedSlice();
}
pub fn main() !u8 {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const string = try std.io.getStdIn().readToEndAlloc(allocator, 1024 * 1024);
// The string must be decoded before it can be passed to the parser.
// const input = &rem.util.utf8DecodeStringComptime(string);
const input = try utf8DecodeString(allocator, string);
// Create the DOM in which the parsed Document will be created.
var dom = rem.dom.Dom{ .allocator = allocator };
defer dom.deinit();
var parser = try rem.Parser.init(&dom, input, allocator, .abort, false);
defer parser.deinit();
try parser.run();
const errors = parser.errors();
if (errors.len > 0) {
std.log.err("A parsing error occured!\n{s}\n", .{@tagName(errors[0])});
return 1;
}
// const writer = std.io.getStdOut().writer();
const document = parser.getDocument();
// try rem.util.printDocument(writer, document, &dom, allocator);
check(document.element.?);
return 0;
}
const startsWith = std.mem.startsWith;
const startsWithIgnoreCase = std.ascii.startsWithIgnoreCase;
fn isHttps(url: []const u8) bool {
return startsWithIgnoreCase(url, "//") or
startsWithIgnoreCase(url, "http://") or
startsWithIgnoreCase(url, "https://");
}
fn isAbsolute(url: []const u8) bool {
return startsWithIgnoreCase(url, "/");
}
fn check(element: *const rem.dom.Element) void {
// std.log.info("{any}", .{element.element_type});
switch (element.element_type) {
.html_img => {
if (element.attributes.getEntry("alt") == null) {
std.log.err("img with no alt text", .{});
}
},
.html_a => {
if (element.attributes.getEntry("href")) |entry| {
const href = entry.value_ptr.*;
if (isHttps(href)) {
std.log.warn("link to http/s: {s}", .{href});
} else if (isAbsolute(href)) {
std.log.warn("absolute link: {s}", .{href});
}
} else {
std.log.warn("link with no href", .{});
}
},
else => {},
}
// for (element.children.items) |child| {
// switch (child) {
// .element => |el| check(el),
// .cdata => |cdata| std.log.info("cdata: {s}", .{cdata.data.items}),
// }
// }
}