Initial commit
This commit is contained in:
commit
b58c5ce8ca
10 changed files with 494 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
cli
|
||||||
|
zig-cache/
|
||||||
|
zig-out/
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[submodule "commonmark-spec"]
|
||||||
|
path = commonmark-spec
|
||||||
|
url = https://github.com/commonmark/commonmark-spec
|
17
cli.zig
Normal file
17
cli.zig
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const aaronsw = @import("main.zig");
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
defer _ = gpa.deinit();
|
||||||
|
const allocator = &gpa.allocator;
|
||||||
|
|
||||||
|
const str = try std.io.getStdIn().reader().readAllAlloc(allocator, 10241024);
|
||||||
|
defer allocator.free(str);
|
||||||
|
|
||||||
|
const writer = std.io.getStdOut().writer();
|
||||||
|
|
||||||
|
var doc = try aaronsw.parse(allocator, str);
|
||||||
|
defer doc.deinit();
|
||||||
|
try aaronsw.html.printChildren(writer, doc.children);
|
||||||
|
}
|
1
commonmark-spec
Submodule
1
commonmark-spec
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 499ebbad90163881f51498c4c620652d0c66fb2e
|
31
html.zig
Normal file
31
html.zig
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
const std = @import("std");
|
||||||
|
const Children = @import("parser.zig").Children;
|
||||||
|
|
||||||
|
pub fn printChildren(writer: anytype, children: Children) @TypeOf(writer).Error!void {
|
||||||
|
for (children.items) |child| {
|
||||||
|
switch (child) {
|
||||||
|
.document => unreachable,
|
||||||
|
.heading => |heading| {
|
||||||
|
try writer.print("<h{0}>{1s}</h{0}>", .{ heading.level, heading.text });
|
||||||
|
},
|
||||||
|
.paragraph => |paragraph| {
|
||||||
|
try writer.print("<p>{s}</p>", .{paragraph.text});
|
||||||
|
},
|
||||||
|
.block_quote => |block_quote| {
|
||||||
|
try writer.print("<blockquote>", .{});
|
||||||
|
try printChildren(writer, block_quote.children);
|
||||||
|
try writer.print("</blockquote>", .{});
|
||||||
|
},
|
||||||
|
.list => |list| {
|
||||||
|
try writer.print("<ul>", .{});
|
||||||
|
try printChildren(writer, list.children);
|
||||||
|
try writer.print("</ul>", .{});
|
||||||
|
},
|
||||||
|
.list_item => |list_item| {
|
||||||
|
try writer.print("<li>", .{});
|
||||||
|
try printChildren(writer, list_item.children);
|
||||||
|
try writer.print("</li>", .{});
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
4
main.zig
Normal file
4
main.zig
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
pub const html = @import("html.zig");
|
||||||
|
const parser = @import("parser.zig");
|
||||||
|
pub const parse = parser.parse;
|
||||||
|
pub const Children = parser.Children;
|
374
parser.zig
Normal file
374
parser.zig
Normal file
|
@ -0,0 +1,374 @@
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
fn last(slice: anytype) ?*@typeInfo(@TypeOf(slice)).Pointer.child {
|
||||||
|
if (slice.len == 0) return null;
|
||||||
|
return &slice[slice.len - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const Children = std.ArrayList(Block);
|
||||||
|
fn deinitChildren(children: *Children) void {
|
||||||
|
for (children.items) |*child| {
|
||||||
|
switch (child.*) {
|
||||||
|
.document => unreachable,
|
||||||
|
.paragraph => |*paragraph| paragraph.deinit(children.allocator),
|
||||||
|
.heading => |*heading| heading.deinit(children.allocator),
|
||||||
|
.block_quote => |*block_quote| block_quote.deinit(children.allocator),
|
||||||
|
.list => |*list| list.deinit(children.allocator),
|
||||||
|
.list_item => |*list_item| list_item.deinit(children.allocator),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
children.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
const Block = union(enum) {
|
||||||
|
document: Document,
|
||||||
|
paragraph: Paragraph,
|
||||||
|
heading: Heading,
|
||||||
|
block_quote: BlockQuote,
|
||||||
|
list: List,
|
||||||
|
list_item: ListItem,
|
||||||
|
};
|
||||||
|
|
||||||
|
const Document = struct {
|
||||||
|
children: Children,
|
||||||
|
pub fn deinit(self: *Document) void {
|
||||||
|
deinitChildren(&self.children);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const Paragraph = struct {
|
||||||
|
text: []u8,
|
||||||
|
pub fn deinit(self: *Paragraph, allocator: *std.mem.Allocator) void {
|
||||||
|
allocator.free(self.text);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const Heading = struct {
|
||||||
|
level: u8,
|
||||||
|
text: []u8,
|
||||||
|
pub fn deinit(self: *Heading, allocator: *std.mem.Allocator) void {
|
||||||
|
allocator.free(self.text);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const BlockQuote = struct {
|
||||||
|
children: Children,
|
||||||
|
pub fn deinit(self: *BlockQuote, _: *std.mem.Allocator) void {
|
||||||
|
deinitChildren(&self.children);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const List = struct {
|
||||||
|
pub const Kind = union(enum) {
|
||||||
|
bullet: u8,
|
||||||
|
};
|
||||||
|
kind: Kind,
|
||||||
|
tight: bool,
|
||||||
|
children: Children,
|
||||||
|
pub fn deinit(self: *List, _: *std.mem.Allocator) void {
|
||||||
|
deinitChildren(&self.children);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const ListItem = struct {
|
||||||
|
children: Children,
|
||||||
|
pub fn deinit(self: *ListItem, _: *std.mem.Allocator) void {
|
||||||
|
deinitChildren(&self.children);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fn arrayListAppend(comptime T: type, list: *std.ArrayList(T), item: T) !*T {
|
||||||
|
const ptr = try list.addOne();
|
||||||
|
ptr.* = item;
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reallocAppend(
|
||||||
|
allocator: *std.mem.Allocator,
|
||||||
|
alloc_str: []u8,
|
||||||
|
append_str: []const u8,
|
||||||
|
) ![]u8 {
|
||||||
|
const ptr = try allocator.realloc(alloc_str, alloc_str.len + append_str.len);
|
||||||
|
std.mem.copy(u8, ptr[alloc_str.len..], append_str);
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detectIndent(str: []const u8) usize {
|
||||||
|
var indent: usize = 0;
|
||||||
|
for (str) |char| {
|
||||||
|
if (char == ' ') {
|
||||||
|
indent += 1;
|
||||||
|
} else break;
|
||||||
|
}
|
||||||
|
return indent;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn checkIfBlockStillMatches(line: *[]const u8, block: Block) bool {
|
||||||
|
return switch (block) {
|
||||||
|
.document => true,
|
||||||
|
.heading => |_| false,
|
||||||
|
.paragraph => |_| {
|
||||||
|
if (line.*.len == 0) return false;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
.block_quote => |_| {
|
||||||
|
if (std.mem.startsWith(u8, line.*, ">")) {
|
||||||
|
line.* = line.*[1..];
|
||||||
|
const indent = detectIndent(line.*);
|
||||||
|
line.* = line.*[indent..];
|
||||||
|
return true;
|
||||||
|
} else return false;
|
||||||
|
},
|
||||||
|
.list => |_| {
|
||||||
|
if (std.mem.startsWith(u8, line.*, "* ")) {
|
||||||
|
line.* = line.*[2..];
|
||||||
|
return true;
|
||||||
|
} else if (detectIndent(line.*) >= 4) {
|
||||||
|
return true;
|
||||||
|
} else return false;
|
||||||
|
},
|
||||||
|
.list_item => |_| {
|
||||||
|
const indent = detectIndent(line.*);
|
||||||
|
if (indent >= 4) {
|
||||||
|
line.* = line.*[4..];
|
||||||
|
return true;
|
||||||
|
} else return false;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
fn getHeadingLevel(line: []const u8) u8 {
|
||||||
|
var level: u8 = 0;
|
||||||
|
for (line) |char| {
|
||||||
|
if (level == 6) break;
|
||||||
|
if (char == '#') level += 1 else break;
|
||||||
|
}
|
||||||
|
return level;
|
||||||
|
}
|
||||||
|
fn checkIfBlockStarts(allocator: *std.mem.Allocator, line: *[]const u8) !?Block {
|
||||||
|
if (std.mem.startsWith(u8, line.*, ">")) {
|
||||||
|
line.* = line.*[1..];
|
||||||
|
const indent = detectIndent(line.*);
|
||||||
|
line.* = line.*[indent..];
|
||||||
|
return Block{ .block_quote = .{ .children = Children.init(allocator) } };
|
||||||
|
} else if (std.mem.startsWith(u8, line.*, "* ")) {
|
||||||
|
line.* = line.*[2..];
|
||||||
|
return Block{ .list = .{
|
||||||
|
.children = Children.init(allocator),
|
||||||
|
.kind = .{ .bullet = '*' },
|
||||||
|
.tight = false,
|
||||||
|
} };
|
||||||
|
} else if (std.mem.startsWith(u8, line.*, "#")) {
|
||||||
|
const level = getHeadingLevel(line.*);
|
||||||
|
if (line.*[level..].len != 0) {
|
||||||
|
if (line.*[level] != ' ' or line.*[level] != '\t') return null;
|
||||||
|
line.* = line.*[level + 1 ..];
|
||||||
|
}
|
||||||
|
return Block{ .heading = .{
|
||||||
|
.text = try allocator.alloc(u8, 0),
|
||||||
|
.level = level,
|
||||||
|
} };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const StackItem = struct {
|
||||||
|
block: *Block,
|
||||||
|
matched: bool,
|
||||||
|
};
|
||||||
|
const Stack = std.ArrayList(StackItem);
|
||||||
|
|
||||||
|
// Verifies that the stack used to parse documents is not broken.
|
||||||
|
fn verifyStack(stack: Stack) void {
|
||||||
|
for (stack.items) |item, index| {
|
||||||
|
if (index == 0) {
|
||||||
|
std.debug.assert(std.mem.eql(u8, @tagName(item.block.*), "document"));
|
||||||
|
} else {
|
||||||
|
const parent = stack.items[index - 1];
|
||||||
|
const parent_children = switch (parent.block.*) {
|
||||||
|
.document => |document| document.children.items,
|
||||||
|
.paragraph, .heading => {
|
||||||
|
if (index == stack.items.len - 1) break else unreachable;
|
||||||
|
},
|
||||||
|
.block_quote => |block_quote| block_quote.children.items,
|
||||||
|
.list => |list| list.children.items,
|
||||||
|
.list_item => |list_item| list_item.children.items,
|
||||||
|
};
|
||||||
|
std.debug.assert(std.meta.eql(item.block, last(parent_children).?));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(allocator: *std.mem.Allocator, str: []const u8) !Document {
|
||||||
|
var doc_block = Block{ .document = Document{ .children = Children.init(allocator) } };
|
||||||
|
errdefer doc_block.document.deinit();
|
||||||
|
|
||||||
|
var stack = Stack.init(allocator);
|
||||||
|
defer stack.deinit();
|
||||||
|
const doc_stack_item = StackItem{ .block = &doc_block, .matched = true };
|
||||||
|
try stack.append(doc_stack_item);
|
||||||
|
|
||||||
|
var iter = std.mem.split(u8, str, "\n");
|
||||||
|
|
||||||
|
lineLoop: while (iter.next()) |line| {
|
||||||
|
var rest = line;
|
||||||
|
verifyStack(stack);
|
||||||
|
|
||||||
|
for (stack.items) |*item, i| {
|
||||||
|
item.matched = checkIfBlockStillMatches(&rest, item.block.*);
|
||||||
|
if (!item.matched) {
|
||||||
|
switch (item.block.*) {
|
||||||
|
.paragraph, .list_item => {
|
||||||
|
try stack.resize(i);
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
.block_quote => {
|
||||||
|
if (rest.len == 0) {
|
||||||
|
try stack.resize(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (try checkIfBlockStarts(allocator, &rest)) |block| {
|
||||||
|
for (stack.items) |item, i| {
|
||||||
|
if (!item.matched or
|
||||||
|
// If a new block started, finish the paragraph
|
||||||
|
item.block.* == .paragraph)
|
||||||
|
{
|
||||||
|
try stack.resize(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var last_block = stack.items[stack.items.len - 1].block;
|
||||||
|
const last_block_children = switch (last_block.*) {
|
||||||
|
.document => |*document| &document.children,
|
||||||
|
.heading, .paragraph => unreachable,
|
||||||
|
.block_quote => |*block_quote| &block_quote.children,
|
||||||
|
.list => |*list| &list.children,
|
||||||
|
.list_item => |*list_item| &list_item.children,
|
||||||
|
};
|
||||||
|
const new_block = try arrayListAppend(Block, last_block_children, block);
|
||||||
|
try stack.append(.{ .block = new_block, .matched = false });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rest.len == 0) continue :lineLoop;
|
||||||
|
|
||||||
|
var index = stack.items.len - 1;
|
||||||
|
while (true) : (index -= 1) {
|
||||||
|
switch (stack.items[index].block.*) {
|
||||||
|
.heading => |*heading| {
|
||||||
|
heading.text = try reallocAppend(allocator, heading.text, rest);
|
||||||
|
try stack.resize(index);
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
.paragraph => |*paragraph| {
|
||||||
|
if (paragraph.text.len != 0) {
|
||||||
|
paragraph.text = try reallocAppend(allocator, paragraph.text, " ");
|
||||||
|
}
|
||||||
|
paragraph.text = try reallocAppend(allocator, paragraph.text, rest);
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
.document => |*document| {
|
||||||
|
const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
|
||||||
|
const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
|
||||||
|
const paragraph = try arrayListAppend(Block, &document.children, paragraph_block_tmp);
|
||||||
|
try stack.append(.{ .block = paragraph, .matched = false });
|
||||||
|
index = stack.items.len;
|
||||||
|
},
|
||||||
|
.block_quote => |*block_quote| {
|
||||||
|
const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
|
||||||
|
const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
|
||||||
|
const paragraph = try arrayListAppend(Block, &block_quote.children, paragraph_block_tmp);
|
||||||
|
try stack.append(.{ .block = paragraph, .matched = false });
|
||||||
|
index = stack.items.len;
|
||||||
|
},
|
||||||
|
.list => |*list| {
|
||||||
|
const item_tmp = .{ .children = Children.init(allocator) };
|
||||||
|
const item_block_tmp = .{ .list_item = item_tmp };
|
||||||
|
const item = try arrayListAppend(Block, &list.children, item_block_tmp);
|
||||||
|
try stack.append(.{ .block = item, .matched = false });
|
||||||
|
index = stack.items.len;
|
||||||
|
},
|
||||||
|
.list_item => |*list_item| {
|
||||||
|
const paragraph_tmp = .{ .text = try allocator.alloc(u8, 0) };
|
||||||
|
const paragraph_block_tmp = .{ .paragraph = paragraph_tmp };
|
||||||
|
const paragraph = try arrayListAppend(Block, &list_item.children, paragraph_block_tmp);
|
||||||
|
try stack.append(.{ .block = paragraph, .matched = false });
|
||||||
|
index = stack.items.len;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: use std.log when I figure out how to make the messages print when testing
|
||||||
|
std.debug.print("parser: stack: {any}\n", .{stack.items});
|
||||||
|
verifyStack(stack);
|
||||||
|
|
||||||
|
return doc_block.document;
|
||||||
|
}
|
||||||
|
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
test "block quotes" {
|
||||||
|
const str =
|
||||||
|
\\Hello
|
||||||
|
\\
|
||||||
|
\\> Block quotes are
|
||||||
|
\\written like so.
|
||||||
|
\\>
|
||||||
|
\\> They can span multiple paragraphs,
|
||||||
|
\\> if you like.
|
||||||
|
;
|
||||||
|
|
||||||
|
var doc = try parse(std.testing.allocator, str);
|
||||||
|
defer doc.deinit();
|
||||||
|
|
||||||
|
try testing.expectEqual(@as(usize, 2), doc.children.items.len);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[0]));
|
||||||
|
try testing.expectEqualStrings("Hello", doc.children.items[0].paragraph.text);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("block_quote", @tagName(doc.children.items[1]));
|
||||||
|
try testing.expectEqual(@as(usize, 2), doc.children.items[1].block_quote.children.items.len);
|
||||||
|
try testing.expectEqualStrings(
|
||||||
|
"paragraph",
|
||||||
|
@tagName(doc.children.items[1].block_quote.children.items[0]),
|
||||||
|
);
|
||||||
|
try testing.expectEqualStrings(
|
||||||
|
"Block quotes are written like so.",
|
||||||
|
doc.children.items[1].block_quote.children.items[0].paragraph.text,
|
||||||
|
);
|
||||||
|
try testing.expectEqualStrings(
|
||||||
|
"paragraph",
|
||||||
|
@tagName(doc.children.items[1].block_quote.children.items[1]),
|
||||||
|
);
|
||||||
|
try testing.expectEqualStrings(
|
||||||
|
"They can span multiple paragraphs, if you like.",
|
||||||
|
doc.children.items[1].block_quote.children.items[1].paragraph.text,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "headings" {
|
||||||
|
const str =
|
||||||
|
\\Hello
|
||||||
|
\\## Hey how
|
||||||
|
\\Testing
|
||||||
|
\\# headings
|
||||||
|
;
|
||||||
|
|
||||||
|
var doc = try parse(std.testing.allocator, str);
|
||||||
|
defer doc.deinit();
|
||||||
|
|
||||||
|
try testing.expectEqual(@as(usize, 4), doc.children.items.len);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[0]));
|
||||||
|
try testing.expectEqualStrings("Hello", doc.children.items[0].paragraph.text);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("heading", @tagName(doc.children.items[1]));
|
||||||
|
try testing.expectEqualStrings("Hey how", doc.children.items[1].heading.text);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("paragraph", @tagName(doc.children.items[2]));
|
||||||
|
try testing.expectEqualStrings("Testing", doc.children.items[2].paragraph.text);
|
||||||
|
|
||||||
|
try testing.expectEqualStrings("heading", @tagName(doc.children.items[3]));
|
||||||
|
try testing.expectEqualStrings("headings", doc.children.items[3].heading.text);
|
||||||
|
}
|
29
readme.md
Normal file
29
readme.md
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# AaronSw
|
||||||
|
|
||||||
|
[English](readme_en.md)
|
||||||
|
|
||||||
|
Un parser de Markdown escrito en [Zig](https://ziglang.org/es).
|
||||||
|
|
||||||
|
## Objetivos
|
||||||
|
|
||||||
|
En este orden:
|
||||||
|
|
||||||
|
* Ser útil (para un proyecto personal mio)
|
||||||
|
* Ser sencillo y extensible
|
||||||
|
* Seguir la especificación de [CommonMark](https://spec.commonmark.org)
|
||||||
|
* Ser eficiente en recursos
|
||||||
|
|
||||||
|
## Ejemplo de uso
|
||||||
|
|
||||||
|
Ver [cli.zig](cli.zig)
|
||||||
|
|
||||||
|
## CommonMark
|
||||||
|
|
||||||
|
Actualmente: `114 passed, 538 failed, 0 errored, 0 skipped`
|
||||||
|
|
||||||
|
Para correr los tests:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git submodule sync commonmark-spec/
|
||||||
|
./run-spec-tests.sh
|
||||||
|
```
|
27
readme_en.md
Normal file
27
readme_en.md
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
[Castellano](readme.md)
|
||||||
|
|
||||||
|
A Markdown parser written in [Zig](https://ziglang.org).
|
||||||
|
|
||||||
|
## Objectives
|
||||||
|
|
||||||
|
In this order:
|
||||||
|
|
||||||
|
* Be useful (for a secret personal project)
|
||||||
|
* Be simple and extensible
|
||||||
|
* Follow the [CommonMark specification](https://spec.commonmark.org)
|
||||||
|
* Use resources efficiently
|
||||||
|
|
||||||
|
## Example usage
|
||||||
|
|
||||||
|
See [cli.zig](cli.zig)
|
||||||
|
|
||||||
|
## CommonMark
|
||||||
|
|
||||||
|
Currently: `114 passed, 538 failed, 0 errored, 0 skipped`
|
||||||
|
|
||||||
|
To run the tests:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git submodule sync commonmark-spec/
|
||||||
|
./run-spec-tests.sh
|
||||||
|
```
|
5
run-spec-tests.sh
Executable file
5
run-spec-tests.sh
Executable file
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
zig build-exe cli.zig
|
||||||
|
cd commonmark-spec
|
||||||
|
python3 test/spec_tests.py --program ../cli
|
Loading…
Reference in a new issue