commit c3f8ed8ee4e7732ff2d1960ee728f917a81313a3 Author: Nulo Date: Fri Nov 25 18:04:00 2022 -0300 experimental diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b8d4465 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +zig-cache +zig-out + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..7707d08 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "rem"] + path = rem + url = https://github.com/chwayne/rem diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..5c7c203 --- /dev/null +++ b/build.zig @@ -0,0 +1,40 @@ +const std = @import("std"); + +pub fn build(b: *std.build.Builder) void { + const rem_pkg = std.build.Pkg{ + .name = "rem", + .source = .{ .path = "./rem/rem.zig" }, + }; + + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard release options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. + const mode = b.standardReleaseOptions(); + + const exe = b.addExecutable("site-analyzer", "src/main.zig"); + exe.setTarget(target); + exe.setBuildMode(mode); + exe.addPackage(rem_pkg); + exe.install(); + + const run_cmd = exe.run(); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + const exe_tests = b.addTest("src/main.zig"); + exe_tests.setTarget(target); + exe_tests.setBuildMode(mode); + + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&exe_tests.step); +} diff --git a/rem b/rem new file mode 160000 index 0000000..53d2307 --- /dev/null +++ b/rem @@ -0,0 +1 @@ +Subproject commit 53d2307030c9b9ce3501b007de86b49b0838d3ae diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..101c469 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,87 @@ +const std = @import("std"); +const rem = @import("rem"); + +fn utf8DecodeString(allocator: std.mem.Allocator, string: []const u8) ![]u21 { + var list = std.ArrayList(u21).init(allocator); + errdefer list.deinit(); + + var decoded_it = (try std.unicode.Utf8View.init(string)).iterator(); + while (decoded_it.nextCodepoint()) |codepoint| { + try list.append(codepoint); + } + return list.toOwnedSlice(); +} + +pub fn main() !u8 { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const string = try std.io.getStdIn().readToEndAlloc(allocator, 1024 * 1024); + // The string must be decoded before it can be passed to the parser. + // const input = &rem.util.utf8DecodeStringComptime(string); + const input = try utf8DecodeString(allocator, string); + + // Create the DOM in which the parsed Document will be created. + var dom = rem.dom.Dom{ .allocator = allocator }; + defer dom.deinit(); + + var parser = try rem.Parser.init(&dom, input, allocator, .abort, false); + defer parser.deinit(); + try parser.run(); + + const errors = parser.errors(); + if (errors.len > 0) { + std.log.err("A parsing error occured!\n{s}\n", .{@tagName(errors[0])}); + return 1; + } + + // const writer = std.io.getStdOut().writer(); + const document = parser.getDocument(); + + // try rem.util.printDocument(writer, document, &dom, allocator); + check(document.element.?); + return 0; +} +const startsWith = std.mem.startsWith; +const startsWithIgnoreCase = std.ascii.startsWithIgnoreCase; + +fn isHttps(url: []const u8) bool { + return startsWithIgnoreCase(url, "//") or + startsWithIgnoreCase(url, "http://") or + startsWithIgnoreCase(url, "https://"); +} +fn isAbsolute(url: []const u8) bool { + return startsWithIgnoreCase(url, "/"); +} + +fn check(element: *const rem.dom.Element) void { + // std.log.info("{any}", .{element.element_type}); + switch (element.element_type) { + .html_img => { + if (element.attributes.getEntry("alt") == null) { + std.log.err("img with no alt text", .{}); + } + }, + .html_a => { + if (element.attributes.getEntry("href")) |entry| { + const href = entry.value_ptr.*; + if (isHttps(href)) { + std.log.warn("link to http/s: {s}", .{href}); + } else if (isAbsolute(href)) { + std.log.warn("absolute link: {s}", .{href}); + } + } else { + std.log.warn("link with no href", .{}); + } + }, + + else => {}, + } + // for (element.children.items) |child| { + // switch (child) { + // .element => |el| check(el), + // .cdata => |cdata| std.log.info("cdata: {s}", .{cdata.data.items}), + // } + // } +}