experimental
This commit is contained in:
commit
c3f8ed8ee4
5 changed files with 134 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
zig-cache
|
||||||
|
zig-out
|
||||||
|
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[submodule "rem"]
|
||||||
|
path = rem
|
||||||
|
url = https://github.com/chwayne/rem
|
40
build.zig
Normal file
40
build.zig
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub fn build(b: *std.build.Builder) void {
|
||||||
|
const rem_pkg = std.build.Pkg{
|
||||||
|
.name = "rem",
|
||||||
|
.source = .{ .path = "./rem/rem.zig" },
|
||||||
|
};
|
||||||
|
|
||||||
|
// Standard target options allows the person running `zig build` to choose
|
||||||
|
// what target to build for. Here we do not override the defaults, which
|
||||||
|
// means any target is allowed, and the default is native. Other options
|
||||||
|
// for restricting supported target set are available.
|
||||||
|
const target = b.standardTargetOptions(.{});
|
||||||
|
|
||||||
|
// Standard release options allow the person running `zig build` to select
|
||||||
|
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
|
||||||
|
const mode = b.standardReleaseOptions();
|
||||||
|
|
||||||
|
const exe = b.addExecutable("site-analyzer", "src/main.zig");
|
||||||
|
exe.setTarget(target);
|
||||||
|
exe.setBuildMode(mode);
|
||||||
|
exe.addPackage(rem_pkg);
|
||||||
|
exe.install();
|
||||||
|
|
||||||
|
const run_cmd = exe.run();
|
||||||
|
run_cmd.step.dependOn(b.getInstallStep());
|
||||||
|
if (b.args) |args| {
|
||||||
|
run_cmd.addArgs(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
const run_step = b.step("run", "Run the app");
|
||||||
|
run_step.dependOn(&run_cmd.step);
|
||||||
|
|
||||||
|
const exe_tests = b.addTest("src/main.zig");
|
||||||
|
exe_tests.setTarget(target);
|
||||||
|
exe_tests.setBuildMode(mode);
|
||||||
|
|
||||||
|
const test_step = b.step("test", "Run unit tests");
|
||||||
|
test_step.dependOn(&exe_tests.step);
|
||||||
|
}
|
1
rem
Submodule
1
rem
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 53d2307030c9b9ce3501b007de86b49b0838d3ae
|
87
src/main.zig
Normal file
87
src/main.zig
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const rem = @import("rem");
|
||||||
|
|
||||||
|
fn utf8DecodeString(allocator: std.mem.Allocator, string: []const u8) ![]u21 {
|
||||||
|
var list = std.ArrayList(u21).init(allocator);
|
||||||
|
errdefer list.deinit();
|
||||||
|
|
||||||
|
var decoded_it = (try std.unicode.Utf8View.init(string)).iterator();
|
||||||
|
while (decoded_it.nextCodepoint()) |codepoint| {
|
||||||
|
try list.append(codepoint);
|
||||||
|
}
|
||||||
|
return list.toOwnedSlice();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn main() !u8 {
|
||||||
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
defer _ = gpa.deinit();
|
||||||
|
const allocator = gpa.allocator();
|
||||||
|
|
||||||
|
const string = try std.io.getStdIn().readToEndAlloc(allocator, 1024 * 1024);
|
||||||
|
// The string must be decoded before it can be passed to the parser.
|
||||||
|
// const input = &rem.util.utf8DecodeStringComptime(string);
|
||||||
|
const input = try utf8DecodeString(allocator, string);
|
||||||
|
|
||||||
|
// Create the DOM in which the parsed Document will be created.
|
||||||
|
var dom = rem.dom.Dom{ .allocator = allocator };
|
||||||
|
defer dom.deinit();
|
||||||
|
|
||||||
|
var parser = try rem.Parser.init(&dom, input, allocator, .abort, false);
|
||||||
|
defer parser.deinit();
|
||||||
|
try parser.run();
|
||||||
|
|
||||||
|
const errors = parser.errors();
|
||||||
|
if (errors.len > 0) {
|
||||||
|
std.log.err("A parsing error occured!\n{s}\n", .{@tagName(errors[0])});
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// const writer = std.io.getStdOut().writer();
|
||||||
|
const document = parser.getDocument();
|
||||||
|
|
||||||
|
// try rem.util.printDocument(writer, document, &dom, allocator);
|
||||||
|
check(document.element.?);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const startsWith = std.mem.startsWith;
|
||||||
|
const startsWithIgnoreCase = std.ascii.startsWithIgnoreCase;
|
||||||
|
|
||||||
|
fn isHttps(url: []const u8) bool {
|
||||||
|
return startsWithIgnoreCase(url, "//") or
|
||||||
|
startsWithIgnoreCase(url, "http://") or
|
||||||
|
startsWithIgnoreCase(url, "https://");
|
||||||
|
}
|
||||||
|
fn isAbsolute(url: []const u8) bool {
|
||||||
|
return startsWithIgnoreCase(url, "/");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check(element: *const rem.dom.Element) void {
|
||||||
|
// std.log.info("{any}", .{element.element_type});
|
||||||
|
switch (element.element_type) {
|
||||||
|
.html_img => {
|
||||||
|
if (element.attributes.getEntry("alt") == null) {
|
||||||
|
std.log.err("img with no alt text", .{});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.html_a => {
|
||||||
|
if (element.attributes.getEntry("href")) |entry| {
|
||||||
|
const href = entry.value_ptr.*;
|
||||||
|
if (isHttps(href)) {
|
||||||
|
std.log.warn("link to http/s: {s}", .{href});
|
||||||
|
} else if (isAbsolute(href)) {
|
||||||
|
std.log.warn("absolute link: {s}", .{href});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std.log.warn("link with no href", .{});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
// for (element.children.items) |child| {
|
||||||
|
// switch (child) {
|
||||||
|
// .element => |el| check(el),
|
||||||
|
// .cdata => |cdata| std.log.info("cdata: {s}", .{cdata.data.items}),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
Loading…
Reference in a new issue