commit c4b460894d40e0f78342c2b502b8408b1c2f77bc Author: Nulo Date: Sat Jul 31 23:25:00 2021 +0000 Init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e73c965 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +zig-cache/ +zig-out/ diff --git a/markdown.zig b/markdown.zig new file mode 100644 index 0000000..382e6e3 --- /dev/null +++ b/markdown.zig @@ -0,0 +1,203 @@ +pub const std = @import("std"); + +pub const Result = struct { result: Block, rest: []const u8 }; +const definedBlockTypes = [_]type{ Heading, CodeBlock }; +const blockTypes = definedBlockTypes ++ [_]type{Paragraph}; + +pub const Block = union(enum) { + paragraph: Paragraph, + heading: Heading, + code_block: CodeBlock, +}; + +pub const Paragraph = struct { + text: []const u8, + + pub fn parse(allocator: ?*std.mem.Allocator, input: []const u8) !?Result { + _ = allocator; + var rest = input; + if (rest.len == 0) return null; + while (rest.len > 0) : (rest = rest[1..]) { + inline for (definedBlockTypes) |BlockType| { + if (try BlockType.parse(null, rest)) |_| { + const text = trim(input[0 .. input.len - rest.len]); + if (text.len == 0) return null; + return Result{ .result = .{ + .paragraph = .{ .text = text }, + }, .rest = rest }; + } + } + if (std.mem.startsWith(u8, rest, "\n\n")) { + const text = trim(input[0 .. input.len - rest.len]); + if (text.len == 0) return null; + return Result{ .result = .{ + .paragraph = .{ .text = text }, + }, .rest = rest }; + } + } + const text = trim(input); + if (text.len == 0) return null; + return Result{ .result = .{ + .paragraph = .{ .text = text }, + }, .rest = input[input.len..] }; + } +}; + +pub const Heading = struct { + text: []const u8, + level: Level, + + pub const Level = enum(u3) { + one = 1, + two = 2, + three = 3, + four = 4, + five = 5, + six = 6, + }; + + pub fn parse(_: ?*std.mem.Allocator, input: []const u8) !?Result { + inline for (std.meta.fields(Level)) |level| { + const str = ("#" ** level.value) ++ " "; + if (std.mem.startsWith(u8, input, str)) { + const text = untilNewline(input[str.len..]); + return Result{ .result = .{ + .heading = .{ + .text = text, + .level = @intToEnum(Level, level.value), + }, + }, .rest = input[str.len + text.len ..] }; + } + } + return null; + } +}; + +pub const CodeBlock = struct { + language: []const u8, + text: []const u8, + + pub fn parse(_: ?*std.mem.Allocator, input: []const u8) !?Result { + if (std.mem.startsWith(u8, input, "```")) { + const language = std.mem.sliceTo(input[3..], '\n'); + const rest = input[3 + language.len + 1 ..]; + if (std.mem.indexOf(u8, rest, "```")) |end| { + return Result{ + .result = .{ + .code_block = .{ + .language = language, + .text = trim(rest[0..end]), + }, + }, + .rest = rest[end + 3 ..], + }; + } + } + return null; + } +}; + +pub fn untilNewline(input: []const u8) []const u8 { + return if (std.mem.indexOf(u8, input, "\n")) |index| input[0..index] else input; +} + +/// Returns the index of where the next block starts. +pub fn untilNextBlock(input: []const u8) usize { + for (input) |_, i| { + const rest = input[i..]; + if (std.mem.startsWith(u8, rest, "\n\n") or + Heading.parse(rest) != null or + CodeBlock.parse(rest) != null) + { + return i; + } + } + return input.len; +} +pub fn trim(input: []const u8) []const u8 { + return std.mem.trim(u8, input, " \n"); +} + +pub const Tree = std.ArrayList(Block); +pub fn parse( + allocator: *std.mem.Allocator, + input: []const u8, +) !Tree { + var tree = Tree.init(allocator); + errdefer tree.deinit(); + + const orig = trim(input); + var rest = orig; + while (rest.len > 0) { + var ran = false; + inline for (blockTypes) |blockType| { + if (try blockType.parse(allocator, rest)) |result| { + try tree.append(result.result); + rest = result.rest; + // It would be better to just continue the outer while loop but + // that makes the compiler crash. + ran = true; + break; + } + } + if (!ran) rest = rest[1..]; + } + return tree; +} + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = &gpa.allocator; + + const input = try std.io.getStdIn().readToEndAlloc(allocator, 10241024); + defer allocator.free(input); + const stdout = std.io.getStdOut().writer(); + + const parsed = try parse(allocator, input); + defer parsed.deinit(); + + for (parsed.items) |block| { + switch (block) { + .paragraph => |p| try stdout.print("

{s}

", .{p.text}), + .heading => |h| try stdout.print("{1s}", .{ @enumToInt(h.level), h.text }), + .code_block => |c| try stdout.print("
{s}
", .{c.text}), + } + } +} + +test "parse headings and paragraph" { + const allocator = std.testing.allocator; + const input = + \\# Hola + \\## Hola + \\ + \\### Heey + \\ + \\:) + \\## Hola mundo + ; + + const parsed = try parse(allocator, input); + defer parsed.deinit(); + + try std.testing.expectEqual(@as(usize, 5), parsed.items.len); + + try std.testing.expectEqualStrings("heading", @tagName(parsed.items[0])); + try std.testing.expect(parsed.items[0].heading.level == .one); + try std.testing.expectEqualStrings("Hola", parsed.items[0].heading.text); + + try std.testing.expectEqualStrings("heading", @tagName(parsed.items[1])); + try std.testing.expect(parsed.items[1].heading.level == .two); + try std.testing.expectEqualStrings("Hola", parsed.items[1].heading.text); + + try std.testing.expectEqualStrings("heading", @tagName(parsed.items[2])); + try std.testing.expect(parsed.items[2].heading.level == .three); + try std.testing.expectEqualStrings("Heey", parsed.items[2].heading.text); + + try std.testing.expectEqualStrings("paragraph", @tagName(parsed.items[3])); + try std.testing.expectEqualStrings(":)", parsed.items[3].paragraph.text); + + try std.testing.expectEqualStrings("heading", @tagName(parsed.items[4])); + try std.testing.expectEqualStrings("Hola mundo", parsed.items[4].heading.text); +}