From b58c5ce8caaada52e63f4ceaec26c1972c93d931 Mon Sep 17 00:00:00 2001
From: Nulo <git@nulo.in>
Date: Sun, 12 Sep 2021 22:28:40 -0300
Subject: [PATCH] Initial commit

---
 .gitignore        |   3 +
 .gitmodules       |   3 +
 cli.zig           |  17 +++
 commonmark-spec   |   1 +
 html.zig          |  31 ++++
 main.zig          |   4 +
 parser.zig        | 374 ++++++++++++++++++++++++++++++++++++++++++++++
 readme.md         |  29 ++++
 readme_en.md      |  27 ++++
 run-spec-tests.sh |   5 +
 10 files changed, 494 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 100644 cli.zig
 create mode 160000 commonmark-spec
 create mode 100644 html.zig
 create mode 100644 main.zig
 create mode 100644 parser.zig
 create mode 100644 readme.md
 create mode 100644 readme_en.md
 create mode 100755 run-spec-tests.sh
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7305c87
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+cli
+zig-cache/
+zig-out/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..c3e6a18
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "commonmark-spec"]
+	path = commonmark-spec
+	url = https://github.com/commonmark/commonmark-spec
diff --git a/cli.zig b/cli.zig
new file mode 100644
index 0000000..4e94b1a
--- /dev/null
+++ b/cli.zig
@@ -0,0 +1,17 @@
+const std = @import("std");
+const aaronsw = @import("main.zig");
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    const allocator = &gpa.allocator;
+
+    const str = try std.io.getStdIn().reader().readAllAlloc(allocator, 10241024);
+    defer allocator.free(str);
+
+    const writer = std.io.getStdOut().writer();
+
+    var doc = try aaronsw.parse(allocator, str);
+    defer doc.deinit();
+    try aaronsw.html.printChildren(writer, doc.children);
+}
diff --git a/commonmark-spec b/commonmark-spec
new file mode 160000
index 0000000..499ebba
--- /dev/null
+++ b/commonmark-spec
@@ -0,0 +1 @@
+Subproject commit 499ebbad90163881f51498c4c620652d0c66fb2e
diff --git a/html.zig b/html.zig
new file mode 100644
index 0000000..000b2ac
--- /dev/null
+++ b/html.zig
@@ -0,0 +1,31 @@
+const std = @import("std");
+const Children = @import("parser.zig").Children;
+
+pub fn printChildren(writer: anytype, children: Children) @TypeOf(writer).Error!void {
+    for (children.items) |child| {
+        switch (child) {
+            .document => unreachable,
+            .heading => |heading| {
+                try writer.print("<h{0}>{1s}</h{0}>", .{ heading.level, heading.text });
+            },
+            .paragraph => |paragraph| {
+                try writer.print("<p>{s}</p>", .{paragraph.text});
+            },
+            .block_quote => |block_quote| {
+                try writer.print("<blockquote>", .{});
+                try printChildren(writer, block_quote.children);
+                try writer.print("</blockquote>", .{});
+            },
+            .list => |list| {
+                try writer.print("<ul>", .{});
+                try printChildren(writer, list.children);
+                try writer.print("</ul>", .{});
+            },
+            .list_item => |list_item| {
+                try writer.print("<li>", .{});
+                try printChildren(writer, list_item.children);
+                try writer.print("</li>", .{});
+            },
+        }
+    }
+}
diff --git a/main.zig b/main.zig
new file mode 100644
index 0000000..a183b64
--- /dev/null
+++ b/main.zig
@@ -0,0 +1,4 @@
+pub const html = @import("html.zig");
+const parser = @import("parser.zig");
+pub const parse = parser.parse;
+pub const Children = parser.Children;
diff --git a/parser.zig b/parser.zig
new file mode 100644
index 0000000..517e3d7
--- /dev/null
+++ b/parser.zig
@@ -0,0 +1,374 @@
+const std = @import("std");
+
+fn last(slice: anytype) ?*@typeInfo(@TypeOf(slice)).Pointer.child {
+    if (slice.len == 0) return null;
+    return &slice[slice.len - 1];
+}
+
+pub const Children = std.ArrayList(Block);
+fn deinitChildren(children: *Children) void {
+    for (children.items) |*child| {
+        switch (child.*) {
+            .document => unreachable,
+            .paragraph => |*paragraph| paragraph.deinit(children.allocator),
+            .heading => |*heading| heading.deinit(children.allocator),
+            .block_quote => |*block_quote| block_quote.deinit(children.allocator),
+            .list => |*list| list.deinit(children.allocator),
+            .list_item => |*list_item| list_item.deinit(children.allocator),
+        }
+    }
+    children.deinit();
+}
+
+const Block = union(enum) {
+    document: Document,
+    paragraph: Paragraph,
+    heading: Heading,
+    block_quote: BlockQuote,
+    list: List,
+    list_item: ListItem,
+};
+
+const Document = struct {
+    children: Children,
+    pub fn deinit(self: *Document) void {
+        deinitChildren(&self.children);
+    }
+};
+const Paragraph = struct {
+    text: []u8,
+    pub fn deinit(self: *Paragraph, allocator: *std.mem.Allocator) void {
+        allocator.free(self.text);
+    }
+};
+const Heading = struct {
+    level: u8,
+    text: []u8,
+    pub fn deinit(self: *Heading, allocator: *std.mem.Allocator) void {
+        allocator.free(self.text);
+    }
+};
+const BlockQuote = struct {
+    children: Children,
+    pub fn deinit(self: *BlockQuote, _: *std.mem.Allocator) void {
+        deinitChildren(&self.children);
+    }
+};
+const List = struct {
+    pub const Kind = union(enum) {
+        bullet: u8,
+    };
+    kind: Kind,
+    tight: bool,
+    children: Children,
+    pub fn deinit(self: *List, _: *std.mem.Allocator) void {
+        deinitChildren(&self.children);
+    }
+};
+const ListItem = struct {
+    children: Children,
+    pub fn deinit(self: *ListItem, _: *std.mem.Allocator) void {
+        deinitChildren(&self.children);
+    }
+};
+
+fn arrayListAppend(comptime T: type, list: *std.ArrayList(T), item: T) !*T {
+    const ptr = try list.addOne();
+    ptr.* = item;
+    return ptr;
+}
+
+fn reallocAppend(
+    allocator: *std.mem.Allocator,
+    alloc_str: []u8,
+    append_str: []const u8,
+) ![]u8 {
+    const ptr = try allocator.realloc(alloc_str, alloc_str.len + append_str.len);
+    std.mem.copy(u8, ptr[alloc_str.len..], append_str);
+    return ptr;
+}
+
+fn detectIndent(str: []const u8) usize {
+    var indent: usize = 0;
+    for (str) |char| {
+        if (char == ' ') {
+            indent += 1;
+        } else break;
+    }
+    return indent;
+}
+
+fn checkIfBlockStillMatches(line: *[]const u8, block: Block) bool {
+    return switch (block) {
+        .document => true,
+        .heading => |_| false,
+        .paragraph => |_| {
+            if (line.*.len == 0) return false;
+            return true;
+        },
+        .block_quote => |_| {
+            if (std.mem.startsWith(u8, line.*, ">")) {
+                line.* = line.*[1..];
+                const indent = detectIndent(line.*);
+                line.* = line.*[indent..];
+                return true;
+            } else return false;
+        },
+        .list => |_| {
+            if (std.mem.startsWith(u8, line.*, "* ")) {
+                line.* = line.*[2..];
+                return true;
+            } else if (detectIndent(line.*) >= 4) {
+                return true;
+            } else return false;
+        },
+        .list_item => |_| {
+            const indent = detectIndent(line.*);
+            if (indent >= 4) {
+                line.* = line.*[4..];
+                return true;
+            } else return false;
+        },
+    };
+}
+fn getHeadingLevel(line: []const u8) u8 {
+    var level: u8 = 0;
+    for (line) |char| {
+        if (level == 6) break;
+        if (char == '#') level += 1 else break;
+    }
+    return level;
+}
+fn checkIfBlockStarts(allocator: *std.mem.Allocator, line: *[]const u8) !?Block {
+    if (std.mem.startsWith(u8, line.*, ">")) {
+        line.* = line.*[1..];
+        const indent = detectIndent(line.*);
+        line.* = line.*[indent..];
+        return Block{ .block_quote = .{ .children = Children.init(allocator) } };
+    } else if (std.mem.startsWith(u8, line.*, "* ")) {
+        line.* = line.*[2..];
+        return Block{ .list = .{
+            .children = Children.init(allocator),
+            .kind = .{ .bullet = '*' },
+            .tight = false,
+        } };
+    } else if (std.mem.startsWith(u8, line.*, "#")) {
+        const level = getHeadingLevel(line.*);
+        if (line.*[level..].len != 0) {
+            if (line.*[level] != ' ' or line.*[level] != '\t') return null;
+            line.* = line.*[level + 1 ..];
+        }
+        return Block{ .heading = .{
+            .text = try allocator.alloc(u8, 0),
+            .level = level,
+        } };
+    }
+    return null;
+}
+
+const StackItem = struct {
+    block: *Block,
+    matched: bool,
+};
+const Stack = std.ArrayList(StackItem);
+
+// Verifies that the stack used to parse documents is not broken.
+fn verifyStack(stack: Stack) void {
+    for (stack.items) |item, index| {
+        if (index == 0) {
+            std.debug.assert(std.mem.eql(u8, @tagName(item.block.*), "document"));
+        } else {
+            const parent = stack.items[index - 1];
+            const parent_children = switch (parent.block.*) {
+                .document => |document| document.children.items,
+                .paragraph, .heading => {
+                    if (index == stack.items.len - 1) break else unreachable;
+                },
+                .block_quote => |block_quote| block_quote.children.items,
+                .list => |list| list.children.items,
+                .list_item => |list_item| list_item.children.items,
+            };
+            std.debug.assert(std.meta.eql(item.block, last(parent_children).?));
+        }
+    }
+}
+
+pub fn parse(allocator: *std.mem.Allocator, str: []const u8) !Document {
+    var doc_block = Block{ .document = Document{ .children = Children.init(allocator) } };
+    errdefer doc_block.document.deinit();
+
+    var stack = Stack.init(allocator);
+    defer stack.deinit();
+    const doc_stack_item = StackItem{ .block = &doc_block, .matched = true };
+    try stack.append(doc_stack_item);
+
+    var iter = std.mem.split(u8, str, "\n");
+
+    lineLoop: while (iter.next()) |line| {
+        var rest = line;
+        verifyStack(stack);
+
+        for (stack.items) |*item, i| {
+            item.matched = checkIfBlockStillMatches(&rest, item.block.*);
+            if (!item.matched) {
+                switch (item.block.*) {
+                    .paragraph, .list_item => {
+                        try stack.resize(i);
+                        break;
+                    },
+                    .block_quote => {
+                        if (rest.len == 0) {
+                            try stack.resize(i);
+                            break;
+                        }
+                    },
+                    else => {},
+                }
+            }
+        }
+
+        if (try checkIfBlockStarts(allocator, &rest)) |block| {
+            for (stack.items) |item, i| {
+                if (!item.matched or
+                    // If a new block started, finish the paragraph
+                    item.block.* == .paragraph)
+                {
+                    try stack.resize(i);
+                    break;
+                }
+            }
+
+            var last_block = stack.items[stack.items.len - 1].block;
+            const last_block_children = switch (last_block.*) {
+                .document => |*document| &document.children,
+                .heading, .paragraph => unreachable,
+                .block_quote => |*block_quote| &block_quote.children,
+                .list => |*list| &list.children,
+                .list_item => |*list_item| &list_item.children,
+            };
+            const new_block = try arrayListAppend(Block, last_block_children, block);
+            try stack.append(.{ .block = new_block, .matched = false });
+        }
+
+        if (rest.len == 0) continue :lineLoop;
+
+        var index = stack.items.len - 1;
+        while (true) : (index -= 1) {
+            switch (stack.items[index].block.*) {
+                .heading => |*heading| {
+                    heading.text = try reallocAppend(allocator, heading.text, rest);
+                    try stack.resize(index);
+                    break;
+                },
+                .paragraph => |*paragraph| {
+                    if (paragraph.text.len != 0) {
+                        paragraph.text = try reallocAppend(allocator, paragraph.text, " ");
+                    }
+                    paragraph.text = try reallocAppend(allocator, paragraph.text, rest);
+                    break;
+                },
+                .document => |*document| {
+                    const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
+                    const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
+                    const paragraph = try arrayListAppend(Block, &document.children, paragraph_block_tmp);
+                    try stack.append(.{ .block = paragraph, .matched = false });
+                    index = stack.items.len;
+                },
+                .block_quote => |*block_quote| {
+                    const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
+                    const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
+                    const paragraph = try arrayListAppend(Block, &block_quote.children, paragraph_block_tmp);
+                    try stack.append(.{ .block = paragraph, .matched = false });
+                    index = stack.items.len;
+                },
+                .list => |*list| {
+                    const item_tmp = .{ .children = Children.init(allocator) };
+                    const item_block_tmp = .{ .list_item = item_tmp };
+                    const item = try arrayListAppend(Block, &list.children, item_block_tmp);
+                    try stack.append(.{ .block = item, .matched = false });
+                    index = stack.items.len;
+                },
+                .list_item => |*list_item| {
+                    const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
+                    const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
+                    const paragraph = try arrayListAppend(Block, &list_item.children, paragraph_block_tmp);
+                    try stack.append(.{ .block = paragraph, .matched = false });
+                    index = stack.items.len;
+                },
+            }
+        }
+    }
+
+    // TODO: use std.log when I figure out how to make the messages print when testing
+    std.debug.print("parser: stack: {any}\n", .{stack.items});
+    verifyStack(stack);
+
+    return doc_block.document;
+}
+
+const testing = std.testing;
+
+test "block quotes" {
+    const str =
+        \\Hello
+        \\
+        \\> Block quotes are
+        \\written like so.
+        \\>
+        \\> They can span multiple paragraphs,
+        \\> if you like.
+    ;
+
+    var doc = try parse(std.testing.allocator, str);
+    defer doc.deinit();
+
+    try testing.expectEqual(@as(usize, 2), doc.children.items.len);
+
+    try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[0]));
+    try testing.expectEqualStrings("Hello", doc.children.items[0].paragraph.text);
+
+    try testing.expectEqualStrings("block_quote", @tagName(doc.children.items[1]));
+    try testing.expectEqual(@as(usize, 2), doc.children.items[1].block_quote.children.items.len);
+    try testing.expectEqualStrings(
+        "paragraph",
+        @tagName(doc.children.items[1].block_quote.children.items[0]),
+    );
+    try testing.expectEqualStrings(
+        "Block quotes are written like so.",
+        doc.children.items[1].block_quote.children.items[0].paragraph.text,
+    );
+    try testing.expectEqualStrings(
+        "paragraph",
+        @tagName(doc.children.items[1].block_quote.children.items[1]),
+    );
+    try testing.expectEqualStrings(
+        "They can span multiple paragraphs, if you like.",
+        doc.children.items[1].block_quote.children.items[1].paragraph.text,
+    );
+}
+
+test "headings" {
+    const str =
+        \\Hello
+        \\## Hey how
+        \\Testing
+        \\# headings
+    ;
+
+    var doc = try parse(std.testing.allocator, str);
+    defer doc.deinit();
+
+    try testing.expectEqual(@as(usize, 4), doc.children.items.len);
+
+    try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[0]));
+    try testing.expectEqualStrings("Hello", doc.children.items[0].paragraph.text);
+
+    try testing.expectEqualStrings("heading", @tagName(doc.children.items[1]));
+    try testing.expectEqualStrings("Hey how", doc.children.items[1].heading.text);
+
+    try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[2]));
+    try testing.expectEqualStrings("Testing", doc.children.items[2].paragraph.text);
+
+    try testing.expectEqualStrings("heading", @tagName(doc.children.items[3]));
+    try testing.expectEqualStrings("headings", doc.children.items[3].heading.text);
+}
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..cf97108
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,29 @@
+# AaronSw
+
+[English](readme_en.md)
+
+Un parser de Markdown escrito en [Zig](https://ziglang.org/es).
+
+## Objetivos
+
+En este orden:
+
+* Ser útil (para un proyecto personal mio)
+* Ser sencillo y extensible
+* Seguir la especificación de [CommonMark](https://spec.commonmark.org)
+* Ser eficiente en recursos
+
+## Ejemplo de uso
+
+Ver [cli.zig](cli.zig)
+
+## CommonMark
+
+Actualmente: `114 passed, 538 failed, 0 errored, 0 skipped`
+
+Para correr los tests:
+
+```sh
+git submodule sync commonmark-spec/
+./run-spec-tests.sh
+```
diff --git a/readme_en.md b/readme_en.md
new file mode 100644
index 0000000..692a343
--- /dev/null
+++ b/readme_en.md
@@ -0,0 +1,27 @@
+[Castellano](readme.md)
+
+A Markdown parser written in [Zig](https://ziglang.org).
+
+## Objectives
+
+In this order:
+
+* Be useful (for a secret personal project)
+* Be simple and extensible
+* Follow the [CommonMark specification](https://spec.commonmark.org)
+* Use resources efficiently
+
+## Example usage
+
+See [cli.zig](cli.zig)
+
+## CommonMark
+
+Currently: `114 passed, 538 failed, 0 errored, 0 skipped`
+
+To run the tests:
+
+```sh
+git submodule sync commonmark-spec/
+./run-spec-tests.sh
+```
diff --git a/run-spec-tests.sh b/run-spec-tests.sh
new file mode 100755
index 0000000..7337256
--- /dev/null
+++ b/run-spec-tests.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+zig build-exe cli.zig
+cd commonmark-spec
+python3 test/spec_tests.py --program ../cli