kick out the jams

Make the module act more like a module. The test code is now an
example. Get rid of redundant "Cmark" prefix in many types. Implement
most of the missing node routines.
This commit is contained in:
torque 2023-09-09 23:45:22 -07:00
parent ce0c1ae97c
commit 3a73fbe312
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
3 changed files with 253 additions and 107 deletions

View File

@ -13,18 +13,48 @@ pub fn build(b: *std.Build) void {
}); });
const cmark_c = cmark_build.cmark_lib(b, .{ const cmark_c = cmark_build.cmark_lib(b, .{
.name = "cmark-c",
.target = target, .target = target,
.optimize = optimize, .optimize = optimize,
}); });
_ = cmark; add_examples(b, .{
.target = target,
const cmarktest = b.addExecutable(.{ .cmark_module = cmark,
.name = "cmtest", .cmark_c = cmark_c,
.root_source_file = .{ .path = "src/cmark.zig" },
}); });
}
cmarktest.linkLibrary(cmark_c);
const ExampleOptions = struct {
b.installArtifact(cmarktest); target: std.zig.CrossTarget,
cmark_module: *std.Build.Module,
cmark_c: *std.Build.Step.Compile,
};
const Example = struct {
name: []const u8,
file: []const u8,
};
const examples = [_]Example{
.{ .name = "render_html", .file = "examples/render_html.zig" },
};
pub fn add_examples(b: *std.build, options: ExampleOptions) void {
const example_step = b.step("examples", "build examples");
inline for (examples) |example| {
const ex_exe = b.addExecutable(.{
.name = example.name,
.root_source_file = .{ .path = example.file },
.target = options.target,
.optimize = .Debug,
});
ex_exe.addModule("cmark", options.cmark_module);
ex_exe.linkLibrary(options.cmark_c);
const install = b.addInstallArtifact(ex_exe, .{});
example_step.dependOn(&install.step);
}
} }

42
examples/render_html.zig Normal file
View File

@ -0,0 +1,42 @@
const std = @import("std");
const cmark = @import("cmark");
pub fn main() !void {
const a = std.heap.page_allocator;
const parser = try cmark.Parser.init(&a, .{});
defer parser.deinit();
parser.feed(
\\##### Test
\\
\\This is a test of *commonmark* **parsing**
\\
\\-----
\\
\\ * `good`
\\ * [bye][@@@]
\\
\\```
\\farewell
\\```
\\
\\[@@@]: greetings (
\\ this is a long url title where I can put whatever I want on and on
\\ even over many lines
\\)
\\
);
const node = try parser.finish();
defer node.deinit();
const iterator = try node.iterator();
defer iterator.deinit();
while (iterator.next()) |visit| {
std.debug.print("{s} {s}\n", .{ @tagName(visit.event), @tagName(visit.node) });
}
std.debug.print("{s}\n", .{try node.render(.html, .{})});
}

View File

@ -152,7 +152,7 @@ fn cmarkFree(ctx: ?*anyopaque, mem: ?*anyopaque) callconv(.C) void {
allocator.free(raw_mem); allocator.free(raw_mem);
} }
pub fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem { pub fn wrapAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem {
return .{ return .{
.ctx = @constCast(allocator), .ctx = @constCast(allocator),
.calloc = cmarkCalloc, .calloc = cmarkCalloc,
@ -161,36 +161,36 @@ pub fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem {
}; };
} }
const CmarkNode = union(enum) { const Node = union(enum) {
document: *CmarkOpaqueNode, document: *OpaqueNode,
heading: *CmarkHeadingNode, heading: *HeadingNode,
block_quote: *CmarkOpaqueNode, block_quote: *OpaqueNode,
bullet_list: *CmarkOpaqueNode, bullet_list: *OpaqueNode,
ordered_list: *CmarkOrderedListNode, ordered_list: *OrderedListNode,
item: *CmarkOpaqueNode, item: *OpaqueNode,
code_block: *CmarkOpaqueNode, code_block: *OpaqueNode,
html_block: *CmarkOpaqueNode, html_block: *BlockNode,
custom_block: *CmarkOpaqueNode, custom_block: *CustomNode,
thematic_break: *CmarkOpaqueNode, thematic_break: *OpaqueNode,
paragraph: *CmarkOpaqueNode, paragraph: *OpaqueNode,
text: *CmarkOpaqueNode, text: *BlockNode,
softbreak: *CmarkOpaqueNode, softbreak: *OpaqueNode,
linebreak: *CmarkOpaqueNode, linebreak: *OpaqueNode,
html_inline: *CmarkOpaqueNode, html_inline: *BlockNode,
custom_inline: *CmarkOpaqueNode, custom_inline: *CustomNode,
code: *CmarkOpaqueNode, code: *BlockNode,
emph: *CmarkOpaqueNode, emph: *OpaqueNode,
strong: *CmarkOpaqueNode, strong: *OpaqueNode,
link: *CmarkOpaqueNode, link: *LinkNode,
image: *CmarkOpaqueNode, image: *LinkNode,
fn fromCNode(c_node: ?*cmark.cmark_node) !CmarkNode { fn fromCNode(c_node: ?*cmark.cmark_node) !Node {
const node = c_node orelse return error.Failed; const node = c_node orelse return error.Failed;
switch (@as(NodeType, @enumFromInt(cmark.cmark_node_get_type(@ptrCast(node))))) { switch (@as(NodeType, @enumFromInt(cmark.cmark_node_get_type(@ptrCast(node))))) {
@ -223,7 +223,7 @@ const CmarkNode = union(enum) {
} }
} }
pub fn deinit(self: CmarkNode) void { pub fn deinit(self: Node) void {
switch (self) { switch (self) {
inline else => |node| { inline else => |node| {
cmark.cmark_node_free(@ptrCast(node)); cmark.cmark_node_free(@ptrCast(node));
@ -231,7 +231,7 @@ const CmarkNode = union(enum) {
} }
} }
pub fn render(self: CmarkNode, format: RenderFormat, options: RenderOptions) ![:0]const u8 { pub fn render(self: Node, format: RenderFormat, options: RenderOptions) ![:0]const u8 {
const unwrapped: *cmark.cmark_node = switch (self) { const unwrapped: *cmark.cmark_node = switch (self) {
inline else => |node| @ptrCast(node), inline else => |node| @ptrCast(node),
}; };
@ -248,7 +248,7 @@ const CmarkNode = union(enum) {
return std.mem.sliceTo(result, 0); return std.mem.sliceTo(result, 0);
} }
pub fn unlink(self: CmarkNode) void { pub fn unlink(self: Node) void {
switch (self) { switch (self) {
inline else => |node| { inline else => |node| {
cmark.cmark_unlink_node(@ptrCast(node)); cmark.cmark_unlink_node(@ptrCast(node));
@ -257,7 +257,7 @@ const CmarkNode = union(enum) {
} }
// inserts self before sibling // inserts self before sibling
pub fn insertBefore(self: CmarkNode, sibling: CmarkNode) !void { pub fn insertBefore(self: Node, sibling: Node) !void {
switch (self) { switch (self) {
inline else => |node| switch (sibling) { inline else => |node| switch (sibling) {
inline else => |sib_node| { inline else => |sib_node| {
@ -270,7 +270,7 @@ const CmarkNode = union(enum) {
} }
// inserts self after sibling // inserts self after sibling
pub fn insertAfter(self: CmarkNode, sibling: CmarkNode) !void { pub fn insertAfter(self: Node, sibling: Node) !void {
switch (self) { switch (self) {
inline else => |node| switch (sibling) { inline else => |node| switch (sibling) {
inline else => |sib_node| { inline else => |sib_node| {
@ -283,7 +283,7 @@ const CmarkNode = union(enum) {
} }
// replace self with new. Does not free self. // replace self with new. Does not free self.
pub fn replaceWith(self: CmarkNode, new: CmarkNode) !void { pub fn replaceWith(self: Node, new: Node) !void {
switch (self) { switch (self) {
inline else => |node| switch (new) { inline else => |node| switch (new) {
inline else => |new_node| { inline else => |new_node| {
@ -294,7 +294,7 @@ const CmarkNode = union(enum) {
} }
} }
pub fn prependChild(self: CmarkNode, child: CmarkNode) !void { pub fn prependChild(self: Node, child: Node) !void {
switch (self) { switch (self) {
inline else => |node| switch (child) { inline else => |node| switch (child) {
inline else => |child_node| { inline else => |child_node| {
@ -305,7 +305,7 @@ const CmarkNode = union(enum) {
} }
} }
pub fn appendChild(self: CmarkNode, child: CmarkNode) !void { pub fn appendChild(self: Node, child: Node) !void {
switch (self) { switch (self) {
inline else => |node| switch (child) { inline else => |node| switch (child) {
inline else => |child_node| { inline else => |child_node| {
@ -316,6 +316,30 @@ const CmarkNode = union(enum) {
} }
} }
pub fn getStartLine(self: Node) c_int {
return switch (self) {
inline else => |node| node.getStartLine(),
};
}
pub fn getStartColumn(self: Node) c_int {
return switch (self) {
inline else => |node| node.getStartColumn(),
};
}
pub fn getEndLine(self: Node) c_int {
return switch (self) {
inline else => |node| node.getEndLine(),
};
}
pub fn getEndColumn(self: Node) c_int {
return switch (self) {
inline else => |node| node.getEndColumn(),
};
}
pub const NodeIterator = opaque { pub const NodeIterator = opaque {
pub const Event = enum(c_int) { pub const Event = enum(c_int) {
none = cmark.CMARK_EVENT_NONE, none = cmark.CMARK_EVENT_NONE,
@ -327,7 +351,7 @@ const CmarkNode = union(enum) {
pub const NodeVisit = struct { pub const NodeVisit = struct {
event: Event, event: Event,
node: CmarkNode, node: Node,
}; };
pub fn deinit(self: *NodeIterator) void { pub fn deinit(self: *NodeIterator) void {
@ -339,7 +363,7 @@ const CmarkNode = union(enum) {
switch (event) { switch (event) {
.done => return null, .done => return null,
.enter, .exit => |evt| { .enter, .exit => |evt| {
const node = CmarkNode.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable; const node = Node.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable;
const entex: Event = switch (node) { const entex: Event = switch (node) {
.html_block, .html_block,
.thematic_break, .thematic_break,
@ -370,12 +394,12 @@ const CmarkNode = union(enum) {
} }
} }
pub fn root(self: *NodeIterator) CmarkNode { pub fn root(self: *NodeIterator) Node {
return CmarkNode.fromCNode(cmark.cmark_iter_get_root(@ptrCast(self))) catch unreachable; return Node.fromCNode(cmark.cmark_iter_get_root(@ptrCast(self))) catch unreachable;
} }
}; };
pub fn iterator(self: CmarkNode) !*NodeIterator { pub fn iterator(self: Node) !*NodeIterator {
switch (self) { switch (self) {
inline else => |node| { inline else => |node| {
const iter: *cmark.cmark_iter = cmark.cmark_iter_new(@ptrCast(node)) orelse const iter: *cmark.cmark_iter = cmark.cmark_iter_new(@ptrCast(node)) orelse
@ -386,7 +410,7 @@ const CmarkNode = union(enum) {
} }
}; };
pub fn CmarkNodeCommon(comptime Self: type) type { pub fn NodeCommon(comptime Self: type) type {
return struct { return struct {
pub fn getUserData(self: *Self) ?*anyopaque { pub fn getUserData(self: *Self) ?*anyopaque {
return @ptrCast(cmark.cmark_node_get_user_data(@ptrCast(self))); return @ptrCast(cmark.cmark_node_get_user_data(@ptrCast(self)));
@ -395,10 +419,26 @@ pub fn CmarkNodeCommon(comptime Self: type) type {
pub fn setUserData(self: *Self, user_data: ?*anyopaque) bool { pub fn setUserData(self: *Self, user_data: ?*anyopaque) bool {
return cmark.cmark_node_set_user_data(@ptrCast(self), user_data) == 1; return cmark.cmark_node_set_user_data(@ptrCast(self), user_data) == 1;
} }
pub fn getStartLine(self: *Self) c_int {
return cmark.cmark_node_get_start_line(@ptrCast(self));
}
pub fn getStartColumn(self: *Self) c_int {
return cmark.cmark_node_get_start_column(@ptrCast(self));
}
pub fn getEndLine(self: *Self) c_int {
return cmark.cmark_node_get_end_line(@ptrCast(self));
}
pub fn getEndColumn(self: *Self) c_int {
return cmark.cmark_node_get_end_column(@ptrCast(self));
}
}; };
} }
pub fn CmarkBlockNodeContents(comptime Self: type) type { pub fn BlockNodeCommon(comptime Self: type) type {
return struct { return struct {
pub fn getContent(self: *Self) [:0]const u8 { pub fn getContent(self: *Self) [:0]const u8 {
return cmark.cmark_node_get_literal(@ptrCast(self)); return cmark.cmark_node_get_literal(@ptrCast(self));
@ -410,68 +450,130 @@ pub fn CmarkBlockNodeContents(comptime Self: type) type {
}; };
} }
pub const CmarkOpaqueNode = opaque { pub const OpaqueNode = opaque {
pub usingnamespace CmarkNodeCommon(@This()); pub usingnamespace NodeCommon(@This());
}; };
pub const CmarkHeadingNode = opaque { pub const HeadingNode = opaque {
pub fn getLevel(self: *CmarkHeadingNode) i3 { pub usingnamespace NodeCommon(@This());
pub fn getLevel(self: *HeadingNode) i3 {
return @intCast(cmark.cmark_node_get_heading_level(@ptrCast(self))); return @intCast(cmark.cmark_node_get_heading_level(@ptrCast(self)));
} }
pub fn setLevel(self: *CmarkHeadingNode, level: i3) !void { pub fn setLevel(self: *HeadingNode, level: i3) !void {
if (cmark.cmark_node_set_heading_level(@ptrCast(self), level) != 1) if (cmark.cmark_node_set_heading_level(@ptrCast(self), level) != 1)
return error.Failed; return error.Failed;
} }
pub usingnamespace CmarkNodeCommon(@This());
}; };
pub const CmarkOrderedListNode = opaque { pub const OrderedListNode = opaque {
pub fn getDelimeter(self: *CmarkOrderedListNode) DelimType { pub usingnamespace NodeCommon(@This());
pub fn getDelimeter(self: *OrderedListNode) DelimType {
return @enumFromInt(cmark.cmark_node_get_list_delim(@ptrCast(self))); return @enumFromInt(cmark.cmark_node_get_list_delim(@ptrCast(self)));
} }
pub fn setDelimiter(self: *CmarkOrderedListNode, new: DelimType) !void { pub fn setDelimiter(self: *OrderedListNode, new: DelimType) !void {
if (cmark.cmark_node_set_list_delim(@ptrCast(self), @intFromEnum(new)) != 1) if (cmark.cmark_node_set_list_delim(@ptrCast(self), @intFromEnum(new)) != 1)
return error.Failed; return error.Failed;
} }
pub fn getStart(self: *CmarkOrderedListNode) i32 { pub fn getStart(self: *OrderedListNode) i32 {
return @intCast(cmark.cmark_node_get_list_start(@ptrCast(self))); return @intCast(cmark.cmark_node_get_list_start(@ptrCast(self)));
} }
pub fn setStart(self: *CmarkOrderedListNode, start: i32) !void { pub fn setStart(self: *OrderedListNode, start: i32) !void {
if (cmark.cmark_node_get_list_start(@ptrCast(self), @intCast(start)) != 1) if (cmark.cmark_node_get_list_start(@ptrCast(self), @intCast(start)) != 1)
return error.Failed; return error.Failed;
} }
pub fn getTight(self: *CmarkOrderedListNode) bool { pub fn getTight(self: *OrderedListNode) bool {
return cmark.cmark_node_get_list_start(@ptrCast(self)) == 1; return cmark.cmark_node_get_list_start(@ptrCast(self)) == 1;
} }
pub fn setTight(self: *CmarkOrderedListNode, tight: bool) !void { pub fn setTight(self: *OrderedListNode, tight: bool) !void {
if (cmark.cmark_node_get_list_start(@ptrCast(self), @intFromBool(tight)) != 1) if (cmark.cmark_node_get_list_start(@ptrCast(self), @intFromBool(tight)) != 1)
return error.Failed; return error.Failed;
} }
pub usingnamespace CmarkNodeCommon(@This());
}; };
pub const CmarkCodeBlockNode = opaque { pub const CodeBlockNode = opaque {
pub fn getFenceInfo(self: *CmarkHeadingNode) [:0]const u8 { pub usingnamespace NodeCommon(@This());
const str: [*:0]const u8 = cmark.cmark_node_get_fence_info(@ptrCast(self)) orelse pub usingnamespace BlockNodeCommon(@This());
return error.Failed;
pub fn getFenceInfo(self: *HeadingNode) [:0]const u8 {
// cmark returns an empty string if there is no fence
const str: [*:0]const u8 = cmark.cmark_node_get_fence_info(@ptrCast(self)) orelse unreachable;
return std.mem.sliceTo(str, 0); return std.mem.sliceTo(str, 0);
} }
pub fn setFenceInfo(self: *CmarkHeadingNode, info: [:0]const u8) !void { pub fn setFenceInfo(self: *HeadingNode, info: [:0]const u8) !void {
if (cmark.cmark_node_set_fence_info(@ptrCast(self), info.ptr) != 1) if (cmark.cmark_node_set_fence_info(@ptrCast(self), info.ptr) != 1)
return error.Failed; return error.Failed;
} }
};
pub usingnamespace CmarkNodeCommon(@This()); pub const BlockNode = opaque {
pub usingnamespace NodeCommon(@This());
pub usingnamespace BlockNodeCommon(@This());
};
pub const LinkNode = opaque {
pub usingnamespace NodeCommon(@This());
pub fn getUrl(self: *LinkNode) [:0]const u8 {
// cmark returns an empty string if no URL
const str: [*:0]const u8 = cmark.cmark_node_get_url(@ptrCast(self)) orelse unreachable;
return std.mem.sliceTo(str, 0);
}
pub fn setUrl(self: *LinkNode, new: [:0]const u8) !void {
if (cmark.cmark_node_set_url(@ptrCast(self), new.ptr) != 1)
return error.Failed;
}
pub fn getTitle(self: *LinkNode) [:0]const u8 {
// cmark returns an empty string if no title
const str: [*:0]const u8 = cmark.cmark_node_get_title(@ptrCast(self)) orelse unreachable;
return std.mem.sliceTo(str, 0);
}
pub fn setTitle(self: *LinkNode, new: [:0]const u8) !void {
if (cmark.cmark_node_set_title(@ptrCast(self), new.ptr) != 1)
return error.Failed;
}
};
pub const CustomNode = opaque {
pub usingnamespace NodeCommon(@This());
pub fn getOnEnter(self: *CustomNode) [:0]const u8 {
// cmark returns an empty string if no URL
const str: [*:0]const u8 = cmark.cmark_node_get_on_enter(@ptrCast(self)) orelse unreachable;
return std.mem.sliceTo(str, 0);
}
pub fn setOnEnter(self: *CustomNode, new: [:0]const u8) !void {
if (cmark.cmark_node_set_on_enter(@ptrCast(self), new.ptr) != 1)
return error.Failed;
}
pub fn getOnExit(self: *CustomNode) [:0]const u8 {
// cmark returns an empty string if no title
const str: [*:0]const u8 = cmark.cmark_node_get_on_exit(@ptrCast(self)) orelse unreachable;
return std.mem.sliceTo(str, 0);
}
pub fn setOnExit(self: *CustomNode, new: [:0]const u8) !void {
if (cmark.cmark_node_set_on_exit(@ptrCast(self), new.ptr) != 1)
return error.Failed;
}
}; };
pub const Parser = struct { pub const Parser = struct {
@ -491,7 +593,7 @@ pub const Parser = struct {
// this has to be heap allocated because otherwise the cmark internal object // this has to be heap allocated because otherwise the cmark internal object
// ends up holding a reference to a stack copy that dies with this function. // ends up holding a reference to a stack copy that dies with this function.
self._cmark_mem = try allocator.create(cmark.cmark_mem); self._cmark_mem = try allocator.create(cmark.cmark_mem);
self._cmark_mem.* = wrapCmarkAllocator(self.allocator); self._cmark_mem.* = wrapAllocator(self.allocator);
self._parser = cmark.cmark_parser_new_with_mem( self._parser = cmark.cmark_parser_new_with_mem(
@bitCast(options), @bitCast(options),
@ -512,8 +614,8 @@ pub const Parser = struct {
cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len); cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len);
} }
pub fn finish(self: Parser) !CmarkNode { pub fn finish(self: Parser) !Node {
return CmarkNode.fromCNode( return Node.fromCNode(
cmark.cmark_parser_finish(self._parser) orelse cmark.cmark_parser_finish(self._parser) orelse
return error.InvalidDocument, return error.InvalidDocument,
); );
@ -530,10 +632,12 @@ pub const Parser = struct {
}; };
// the nodes hang on to a reference to the allocator, which does not play nicely at all // the nodes hang on to a reference to the allocator, which does not play nicely at all
// with our allocator wrapping strategy. Basically, the parser has to live through // with our allocator wrapping strategy. Basically, the allocator has to live through
// node rendering. Due to this, it probably makes sense to keep a hard association // node rendering. Due to this, it probably makes sense to keep a hard association
// between the parser and the node tree. // between the parser and the node tree (i.e. expose the node iterator and the render
pub fn parse(allocator: *std.mem.Allocator, buffer: []const u8, options: ParseOptions) !CmarkNode { // method on Parser). This forces more obvious coupling at the cost of in some ways a
// less elegant API.
pub fn parse(allocator: *std.mem.Allocator, buffer: []const u8, options: ParseOptions) !Node {
const parser = try Parser.init(allocator, options); const parser = try Parser.init(allocator, options);
defer parser.deinitParser(); defer parser.deinitParser();
@ -542,34 +646,4 @@ pub fn parse(allocator: *std.mem.Allocator, buffer: []const u8, options: ParseOp
return try parser.finish(); return try parser.finish();
} }
// pub fn parseFile(allocator: std.mem.Allocator, path: []const u8, options: ParseOptions) !CmarkNode // pub fn parseFile(allocator: std.mem.Allocator, path: []const u8, options: ParseOptions) !Node
pub fn main() !void {
const a = std.heap.page_allocator;
const parser = try Parser.init(&a, .{});
defer parser.deinit();
parser.feed(
\\###### Test
\\
\\This is a test of *commonmark* **parsing**
\\
\\---
\\
\\ * `good`
\\ * [bye](bye)
\\
);
const node = try parser.finish();
defer node.deinit();
const iterator = try node.iterator();
defer iterator.deinit();
while (iterator.next()) |visit| {
std.debug.print("{s} {s}\n", .{ @tagName(visit.event), @tagName(visit.node) });
}
std.debug.print("{s}\n", .{try node.render(.html, .{})});
}