This commit is contained in:
Cat /dev/Nulo 2021-07-31 23:25:00 +00:00
commit c4b460894d
2 changed files with 205 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
zig-cache/
zig-out/

203
markdown.zig Normal file
View file

@ -0,0 +1,203 @@
pub const std = @import("std");
pub const Result = struct { result: Block, rest: []const u8 };
const definedBlockTypes = [_]type{ Heading, CodeBlock };
const blockTypes = definedBlockTypes ++ [_]type{Paragraph};
pub const Block = union(enum) {
paragraph: Paragraph,
heading: Heading,
code_block: CodeBlock,
};
pub const Paragraph = struct {
text: []const u8,
pub fn parse(allocator: ?*std.mem.Allocator, input: []const u8) !?Result {
_ = allocator;
var rest = input;
if (rest.len == 0) return null;
while (rest.len > 0) : (rest = rest[1..]) {
inline for (definedBlockTypes) |BlockType| {
if (try BlockType.parse(null, rest)) |_| {
const text = trim(input[0 .. input.len - rest.len]);
if (text.len == 0) return null;
return Result{ .result = .{
.paragraph = .{ .text = text },
}, .rest = rest };
}
}
if (std.mem.startsWith(u8, rest, "\n\n")) {
const text = trim(input[0 .. input.len - rest.len]);
if (text.len == 0) return null;
return Result{ .result = .{
.paragraph = .{ .text = text },
}, .rest = rest };
}
}
const text = trim(input);
if (text.len == 0) return null;
return Result{ .result = .{
.paragraph = .{ .text = text },
}, .rest = input[input.len..] };
}
};
pub const Heading = struct {
text: []const u8,
level: Level,
pub const Level = enum(u3) {
one = 1,
two = 2,
three = 3,
four = 4,
five = 5,
six = 6,
};
pub fn parse(_: ?*std.mem.Allocator, input: []const u8) !?Result {
inline for (std.meta.fields(Level)) |level| {
const str = ("#" ** level.value) ++ " ";
if (std.mem.startsWith(u8, input, str)) {
const text = untilNewline(input[str.len..]);
return Result{ .result = .{
.heading = .{
.text = text,
.level = @intToEnum(Level, level.value),
},
}, .rest = input[str.len + text.len ..] };
}
}
return null;
}
};
pub const CodeBlock = struct {
language: []const u8,
text: []const u8,
pub fn parse(_: ?*std.mem.Allocator, input: []const u8) !?Result {
if (std.mem.startsWith(u8, input, "```")) {
const language = std.mem.sliceTo(input[3..], '\n');
const rest = input[3 + language.len + 1 ..];
if (std.mem.indexOf(u8, rest, "```")) |end| {
return Result{
.result = .{
.code_block = .{
.language = language,
.text = trim(rest[0..end]),
},
},
.rest = rest[end + 3 ..],
};
}
}
return null;
}
};
pub fn untilNewline(input: []const u8) []const u8 {
return if (std.mem.indexOf(u8, input, "\n")) |index| input[0..index] else input;
}
/// Returns the index of where the next block starts.
pub fn untilNextBlock(input: []const u8) usize {
for (input) |_, i| {
const rest = input[i..];
if (std.mem.startsWith(u8, rest, "\n\n") or
Heading.parse(rest) != null or
CodeBlock.parse(rest) != null)
{
return i;
}
}
return input.len;
}
pub fn trim(input: []const u8) []const u8 {
return std.mem.trim(u8, input, " \n");
}
pub const Tree = std.ArrayList(Block);
pub fn parse(
allocator: *std.mem.Allocator,
input: []const u8,
) !Tree {
var tree = Tree.init(allocator);
errdefer tree.deinit();
const orig = trim(input);
var rest = orig;
while (rest.len > 0) {
var ran = false;
inline for (blockTypes) |blockType| {
if (try blockType.parse(allocator, rest)) |result| {
try tree.append(result.result);
rest = result.rest;
// It would be better to just continue the outer while loop but
// that makes the compiler crash.
ran = true;
break;
}
}
if (!ran) rest = rest[1..];
}
return tree;
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = &gpa.allocator;
const input = try std.io.getStdIn().readToEndAlloc(allocator, 10241024);
defer allocator.free(input);
const stdout = std.io.getStdOut().writer();
const parsed = try parse(allocator, input);
defer parsed.deinit();
for (parsed.items) |block| {
switch (block) {
.paragraph => |p| try stdout.print("<p>{s}</p>", .{p.text}),
.heading => |h| try stdout.print("<h{0}>{1s}</h{0}>", .{ @enumToInt(h.level), h.text }),
.code_block => |c| try stdout.print("<pre><code>{s}</code></pre>", .{c.text}),
}
}
}
test "parse headings and paragraph" {
const allocator = std.testing.allocator;
const input =
\\# Hola
\\## Hola
\\
\\### Heey
\\
\\:)
\\## Hola mundo
;
const parsed = try parse(allocator, input);
defer parsed.deinit();
try std.testing.expectEqual(@as(usize, 5), parsed.items.len);
try std.testing.expectEqualStrings("heading", @tagName(parsed.items[0]));
try std.testing.expect(parsed.items[0].heading.level == .one);
try std.testing.expectEqualStrings("Hola", parsed.items[0].heading.text);
try std.testing.expectEqualStrings("heading", @tagName(parsed.items[1]));
try std.testing.expect(parsed.items[1].heading.level == .two);
try std.testing.expectEqualStrings("Hola", parsed.items[1].heading.text);
try std.testing.expectEqualStrings("heading", @tagName(parsed.items[2]));
try std.testing.expect(parsed.items[2].heading.level == .three);
try std.testing.expectEqualStrings("Heey", parsed.items[2].heading.text);
try std.testing.expectEqualStrings("paragraph", @tagName(parsed.items[3]));
try std.testing.expectEqualStrings(":)", parsed.items[3].paragraph.text);
try std.testing.expectEqualStrings("heading", @tagName(parsed.items[4]));
try std.testing.expectEqualStrings("Hola mundo", parsed.items[4].heading.text);
}