experimental
This commit is contained in:
commit
c3f8ed8ee4
5 changed files with 134 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
zig-cache
|
||||
zig-out
|
||||
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
[submodule "rem"]
|
||||
path = rem
|
||||
url = https://github.com/chwayne/rem
|
40
build.zig
Normal file
40
build.zig
Normal file
|
@ -0,0 +1,40 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.build.Builder) void {
|
||||
const rem_pkg = std.build.Pkg{
|
||||
.name = "rem",
|
||||
.source = .{ .path = "./rem/rem.zig" },
|
||||
};
|
||||
|
||||
// Standard target options allows the person running `zig build` to choose
|
||||
// what target to build for. Here we do not override the defaults, which
|
||||
// means any target is allowed, and the default is native. Other options
|
||||
// for restricting supported target set are available.
|
||||
const target = b.standardTargetOptions(.{});
|
||||
|
||||
// Standard release options allow the person running `zig build` to select
|
||||
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
|
||||
const mode = b.standardReleaseOptions();
|
||||
|
||||
const exe = b.addExecutable("site-analyzer", "src/main.zig");
|
||||
exe.setTarget(target);
|
||||
exe.setBuildMode(mode);
|
||||
exe.addPackage(rem_pkg);
|
||||
exe.install();
|
||||
|
||||
const run_cmd = exe.run();
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
|
||||
const exe_tests = b.addTest("src/main.zig");
|
||||
exe_tests.setTarget(target);
|
||||
exe_tests.setBuildMode(mode);
|
||||
|
||||
const test_step = b.step("test", "Run unit tests");
|
||||
test_step.dependOn(&exe_tests.step);
|
||||
}
|
1
rem
Submodule
1
rem
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 53d2307030c9b9ce3501b007de86b49b0838d3ae
|
87
src/main.zig
Normal file
87
src/main.zig
Normal file
|
@ -0,0 +1,87 @@
|
|||
const std = @import("std");
|
||||
const rem = @import("rem");
|
||||
|
||||
fn utf8DecodeString(allocator: std.mem.Allocator, string: []const u8) ![]u21 {
|
||||
var list = std.ArrayList(u21).init(allocator);
|
||||
errdefer list.deinit();
|
||||
|
||||
var decoded_it = (try std.unicode.Utf8View.init(string)).iterator();
|
||||
while (decoded_it.nextCodepoint()) |codepoint| {
|
||||
try list.append(codepoint);
|
||||
}
|
||||
return list.toOwnedSlice();
|
||||
}
|
||||
|
||||
pub fn main() !u8 {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const string = try std.io.getStdIn().readToEndAlloc(allocator, 1024 * 1024);
|
||||
// The string must be decoded before it can be passed to the parser.
|
||||
// const input = &rem.util.utf8DecodeStringComptime(string);
|
||||
const input = try utf8DecodeString(allocator, string);
|
||||
|
||||
// Create the DOM in which the parsed Document will be created.
|
||||
var dom = rem.dom.Dom{ .allocator = allocator };
|
||||
defer dom.deinit();
|
||||
|
||||
var parser = try rem.Parser.init(&dom, input, allocator, .abort, false);
|
||||
defer parser.deinit();
|
||||
try parser.run();
|
||||
|
||||
const errors = parser.errors();
|
||||
if (errors.len > 0) {
|
||||
std.log.err("A parsing error occured!\n{s}\n", .{@tagName(errors[0])});
|
||||
return 1;
|
||||
}
|
||||
|
||||
// const writer = std.io.getStdOut().writer();
|
||||
const document = parser.getDocument();
|
||||
|
||||
// try rem.util.printDocument(writer, document, &dom, allocator);
|
||||
check(document.element.?);
|
||||
return 0;
|
||||
}
|
||||
const startsWith = std.mem.startsWith;
|
||||
const startsWithIgnoreCase = std.ascii.startsWithIgnoreCase;
|
||||
|
||||
fn isHttps(url: []const u8) bool {
|
||||
return startsWithIgnoreCase(url, "//") or
|
||||
startsWithIgnoreCase(url, "http://") or
|
||||
startsWithIgnoreCase(url, "https://");
|
||||
}
|
||||
fn isAbsolute(url: []const u8) bool {
|
||||
return startsWithIgnoreCase(url, "/");
|
||||
}
|
||||
|
||||
fn check(element: *const rem.dom.Element) void {
|
||||
// std.log.info("{any}", .{element.element_type});
|
||||
switch (element.element_type) {
|
||||
.html_img => {
|
||||
if (element.attributes.getEntry("alt") == null) {
|
||||
std.log.err("img with no alt text", .{});
|
||||
}
|
||||
},
|
||||
.html_a => {
|
||||
if (element.attributes.getEntry("href")) |entry| {
|
||||
const href = entry.value_ptr.*;
|
||||
if (isHttps(href)) {
|
||||
std.log.warn("link to http/s: {s}", .{href});
|
||||
} else if (isAbsolute(href)) {
|
||||
std.log.warn("absolute link: {s}", .{href});
|
||||
}
|
||||
} else {
|
||||
std.log.warn("link with no href", .{});
|
||||
}
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
// for (element.children.items) |child| {
|
||||
// switch (child) {
|
||||
// .element => |el| check(el),
|
||||
// .cdata => |cdata| std.log.info("cdata: {s}", .{cdata.data.items}),
|
||||
// }
|
||||
// }
|
||||
}
|
Loading…
Reference in a new issue