From ce0c1ae97c2a899f74cfcf69385ff3972c699ba8 Mon Sep 17 00:00:00 2001 From: torque Date: Thu, 7 Sep 2023 23:07:32 -0700 Subject: [PATCH] it's alive Okay, this works. The allocation situation is still kind of a mess, but I think I have enough escape hatches for usability's sake. Parsing from a file is left as an exercise for the user (it's not hard). The node manipulation routines will probably added on demand. Or maybe I will get a wild hair and just do it at some point. --- src/cmark.zig | 166 +++++++++++++++++++++++++------------------------- 1 file changed, 84 insertions(+), 82 deletions(-) diff --git a/src/cmark.zig b/src/cmark.zig index 45c1c37..08e45d2 100644 --- a/src/cmark.zig +++ b/src/cmark.zig @@ -60,7 +60,14 @@ pub const ParseOptions = packed struct(u32) { _padding: u21 = 0, // skip indices 11-31 }; -pub const RenderOptions = packed struct(u32) { +pub const RenderFormat = enum { xml, html, man, commonmark, latex }; + +pub const RenderOptions = struct { + flags: RenderFlags = .{}, + width: c_int = 0, +}; + +pub const RenderFlags = packed struct(u32) { _skip_0: bool = false, // for some reason 1 << 0 is skipped (oversight?) include_sourcepos: bool = false, // index 1 @@ -78,10 +85,10 @@ pub const RenderOptions = packed struct(u32) { }; comptime { - std.debug.assert(@as(u32, @bitCast(RenderOptions{ .include_sourcepos = true })) == cmark.CMARK_OPT_SOURCEPOS); - std.debug.assert(@as(u32, @bitCast(RenderOptions{ .softbreaks_as_hardbreaks = true })) == cmark.CMARK_OPT_HARDBREAKS); - std.debug.assert(@as(u32, @bitCast(RenderOptions{ .softbreaks_as_spaces = true })) == cmark.CMARK_OPT_NOBREAKS); - std.debug.assert(@as(u32, @bitCast(RenderOptions{ .allow_unsafe_html = true })) == cmark.CMARK_OPT_UNSAFE); + std.debug.assert(@as(u32, @bitCast(RenderFlags{ .include_sourcepos = true })) == cmark.CMARK_OPT_SOURCEPOS); + std.debug.assert(@as(u32, @bitCast(RenderFlags{ .softbreaks_as_hardbreaks = true })) == cmark.CMARK_OPT_HARDBREAKS); + std.debug.assert(@as(u32, @bitCast(RenderFlags{ .softbreaks_as_spaces = true })) == cmark.CMARK_OPT_NOBREAKS); + std.debug.assert(@as(u32, @bitCast(RenderFlags{ .allow_unsafe_html = true })) == cmark.CMARK_OPT_UNSAFE); std.debug.assert(@as(u32, @bitCast(ParseOptions{ .validate_utf8 = true })) == cmark.CMARK_OPT_VALIDATE_UTF8); std.debug.assert(@as(u32, @bitCast(ParseOptions{ .smart_quotes_and_dashes = true })) == cmark.CMARK_OPT_SMART); } @@ -145,7 +152,7 @@ fn cmarkFree(ctx: ?*anyopaque, mem: ?*anyopaque) callconv(.C) void { allocator.free(raw_mem); } -fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem { +pub fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem { return .{ .ctx = @constCast(allocator), .calloc = cmarkCalloc, @@ -224,72 +231,21 @@ const CmarkNode = union(enum) { } } - pub fn render_html(self: CmarkNode, options: RenderOptions) ![:0]const u8 { - switch (self) { - inline else => |node| { - const result: [*:0]const u8 = cmark.cmark_render_html( - @ptrCast(node), - @bitCast(options), - ) orelse return error.Failed; + pub fn render(self: CmarkNode, format: RenderFormat, options: RenderOptions) ![:0]const u8 { + const unwrapped: *cmark.cmark_node = switch (self) { + inline else => |node| @ptrCast(node), + }; + const flags: c_int = @bitCast(options.flags); - return std.mem.sliceTo(result, 0); - }, - } - } + const result: [*:0]const u8 = switch (format) { + .xml => cmark.cmark_render_xml(unwrapped, flags) orelse return error.Failed, + .html => cmark.cmark_render_html(unwrapped, flags) orelse return error.Failed, + .man => cmark.cmark_render_man(unwrapped, flags, options.width) orelse return error.Failed, + .commonmark => cmark.cmark_render_commonmark(unwrapped, flags, options.width) orelse return error.Failed, + .latex => cmark.cmark_render_latex(unwrapped, flags, options.width) orelse return error.Failed, + }; - pub fn render_xml(self: CmarkNode, options: RenderOptions) ![:0]const u8 { - switch (self) { - inline else => |node| { - const result: [*:0]const u8 = cmark.cmark_render_xml( - @ptrCast(node), - @bitCast(options), - ) orelse return error.Failed; - - return std.mem.sliceTo(result, 0); - }, - } - } - - pub fn render_man(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 { - switch (self) { - inline else => |node| { - const result: [*:0]const u8 = cmark.cmark_render_man( - @ptrCast(node), - @bitCast(options), - width, - ) orelse return error.Failed; - - return std.mem.sliceTo(result, 0); - }, - } - } - - pub fn render_commonmark(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 { - switch (self) { - inline else => |node| { - const result: [*:0]const u8 = cmark.cmark_render_commonmark( - @ptrCast(node), - @bitCast(options), - width, - ) orelse return error.Failed; - - return std.mem.sliceTo(result, 0); - }, - } - } - - pub fn render_latex(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 { - switch (self) { - inline else => |node| { - const result: [*:0]const u8 = cmark.cmark_render_latex( - @ptrCast(node), - @bitCast(options), - width, - ) orelse return error.Failed; - - return std.mem.sliceTo(result, 0); - }, - } + return std.mem.sliceTo(result, 0); } pub fn unlink(self: CmarkNode) void { @@ -366,6 +322,7 @@ const CmarkNode = union(enum) { done = cmark.CMARK_EVENT_DONE, enter = cmark.CMARK_EVENT_ENTER, exit = cmark.CMARK_EVENT_EXIT, + visit, // a new event we introduce for nodes that will never have `exit` called on them to simplify consumer logic }; pub const NodeVisit = struct { @@ -381,13 +338,23 @@ const CmarkNode = union(enum) { const event: Event = @enumFromInt(cmark.cmark_iter_next(@ptrCast(self))); switch (event) { .done => return null, - .none => @panic("whoah nelly"), - else => |entex| { - return .{ - .event = entex, - .node = CmarkNode.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable, + .enter, .exit => |evt| { + const node = CmarkNode.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable; + const entex: Event = switch (node) { + .html_block, + .thematic_break, + .code_block, + .text, + .softbreak, + .linebreak, + .code, + .html_inline, + => .visit, + else => evt, }; + return .{ .event = entex, .node = node }; }, + .none, .visit => unreachable, } } @@ -534,6 +501,13 @@ pub const Parser = struct { return self; } + pub fn initWithWrappedAllocator(mem: *cmark.cmark_mem, options: ParseOptions) !Parser { + return cmark.cmark_parser_new_with_mem( + @bitCast(options), + mem, + ) orelse error.OutOfMemory; + } + pub fn feed(self: Parser, buffer: []const u8) void { cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len); } @@ -546,28 +520,56 @@ pub const Parser = struct { } pub fn deinit(self: Parser) void { - cmark.cmark_parser_free(self._parser); + self.deinitParser(); self.allocator.destroy(self._cmark_mem); } + + pub fn deinitParser(self: Parser) void { + cmark.cmark_parser_free(self._parser); + } }; // the nodes hang on to a reference to the allocator, which does not play nicely at all // with our allocator wrapping strategy. Basically, the parser has to live through // node rendering. Due to this, it probably makes sense to keep a hard association // between the parser and the node tree. -pub fn parse(allocator: std.mem.Allocator, buffer: []const u8, options: ParseOptions) !CmarkNode { - const parser = try Parser.init(&allocator, options); - defer parser.deinit(); +pub fn parse(allocator: *std.mem.Allocator, buffer: []const u8, options: ParseOptions) !CmarkNode { + const parser = try Parser.init(allocator, options); + defer parser.deinitParser(); parser.feed(buffer); - return try parser.finish; + return try parser.finish(); } // pub fn parseFile(allocator: std.mem.Allocator, path: []const u8, options: ParseOptions) !CmarkNode -pub fn main() void { +pub fn main() !void { const a = std.heap.page_allocator; - const parser = Parser.init(&a, .{}) catch @panic("noop"); + const parser = try Parser.init(&a, .{}); defer parser.deinit(); + + parser.feed( + \\###### Test + \\ + \\This is a test of *commonmark* **parsing** + \\ + \\--- + \\ + \\ * `good` + \\ * [bye](bye) + \\ + ); + + const node = try parser.finish(); + defer node.deinit(); + + const iterator = try node.iterator(); + defer iterator.deinit(); + + while (iterator.next()) |visit| { + std.debug.print("{s} {s}\n", .{ @tagName(visit.event), @tagName(visit.node) }); + } + + std.debug.print("{s}\n", .{try node.render(.html, .{})}); }