it's alive
Okay, this works. The allocation situation is still kind of a mess, but I think I have enough escape hatches for usability's sake. Parsing from a file is left as an exercise for the user (it's not hard). The node manipulation routines will probably added on demand. Or maybe I will get a wild hair and just do it at some point.
This commit is contained in:
parent
933f354077
commit
ce0c1ae97c
166
src/cmark.zig
166
src/cmark.zig
@ -60,7 +60,14 @@ pub const ParseOptions = packed struct(u32) {
|
|||||||
_padding: u21 = 0, // skip indices 11-31
|
_padding: u21 = 0, // skip indices 11-31
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const RenderOptions = packed struct(u32) {
|
pub const RenderFormat = enum { xml, html, man, commonmark, latex };
|
||||||
|
|
||||||
|
pub const RenderOptions = struct {
|
||||||
|
flags: RenderFlags = .{},
|
||||||
|
width: c_int = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const RenderFlags = packed struct(u32) {
|
||||||
_skip_0: bool = false, // for some reason 1 << 0 is skipped (oversight?)
|
_skip_0: bool = false, // for some reason 1 << 0 is skipped (oversight?)
|
||||||
|
|
||||||
include_sourcepos: bool = false, // index 1
|
include_sourcepos: bool = false, // index 1
|
||||||
@ -78,10 +85,10 @@ pub const RenderOptions = packed struct(u32) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
comptime {
|
comptime {
|
||||||
std.debug.assert(@as(u32, @bitCast(RenderOptions{ .include_sourcepos = true })) == cmark.CMARK_OPT_SOURCEPOS);
|
std.debug.assert(@as(u32, @bitCast(RenderFlags{ .include_sourcepos = true })) == cmark.CMARK_OPT_SOURCEPOS);
|
||||||
std.debug.assert(@as(u32, @bitCast(RenderOptions{ .softbreaks_as_hardbreaks = true })) == cmark.CMARK_OPT_HARDBREAKS);
|
std.debug.assert(@as(u32, @bitCast(RenderFlags{ .softbreaks_as_hardbreaks = true })) == cmark.CMARK_OPT_HARDBREAKS);
|
||||||
std.debug.assert(@as(u32, @bitCast(RenderOptions{ .softbreaks_as_spaces = true })) == cmark.CMARK_OPT_NOBREAKS);
|
std.debug.assert(@as(u32, @bitCast(RenderFlags{ .softbreaks_as_spaces = true })) == cmark.CMARK_OPT_NOBREAKS);
|
||||||
std.debug.assert(@as(u32, @bitCast(RenderOptions{ .allow_unsafe_html = true })) == cmark.CMARK_OPT_UNSAFE);
|
std.debug.assert(@as(u32, @bitCast(RenderFlags{ .allow_unsafe_html = true })) == cmark.CMARK_OPT_UNSAFE);
|
||||||
std.debug.assert(@as(u32, @bitCast(ParseOptions{ .validate_utf8 = true })) == cmark.CMARK_OPT_VALIDATE_UTF8);
|
std.debug.assert(@as(u32, @bitCast(ParseOptions{ .validate_utf8 = true })) == cmark.CMARK_OPT_VALIDATE_UTF8);
|
||||||
std.debug.assert(@as(u32, @bitCast(ParseOptions{ .smart_quotes_and_dashes = true })) == cmark.CMARK_OPT_SMART);
|
std.debug.assert(@as(u32, @bitCast(ParseOptions{ .smart_quotes_and_dashes = true })) == cmark.CMARK_OPT_SMART);
|
||||||
}
|
}
|
||||||
@ -145,7 +152,7 @@ fn cmarkFree(ctx: ?*anyopaque, mem: ?*anyopaque) callconv(.C) void {
|
|||||||
allocator.free(raw_mem);
|
allocator.free(raw_mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem {
|
pub fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem {
|
||||||
return .{
|
return .{
|
||||||
.ctx = @constCast(allocator),
|
.ctx = @constCast(allocator),
|
||||||
.calloc = cmarkCalloc,
|
.calloc = cmarkCalloc,
|
||||||
@ -224,72 +231,21 @@ const CmarkNode = union(enum) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn render_html(self: CmarkNode, options: RenderOptions) ![:0]const u8 {
|
pub fn render(self: CmarkNode, format: RenderFormat, options: RenderOptions) ![:0]const u8 {
|
||||||
switch (self) {
|
const unwrapped: *cmark.cmark_node = switch (self) {
|
||||||
inline else => |node| {
|
inline else => |node| @ptrCast(node),
|
||||||
const result: [*:0]const u8 = cmark.cmark_render_html(
|
};
|
||||||
@ptrCast(node),
|
const flags: c_int = @bitCast(options.flags);
|
||||||
@bitCast(options),
|
|
||||||
) orelse return error.Failed;
|
|
||||||
|
|
||||||
return std.mem.sliceTo(result, 0);
|
const result: [*:0]const u8 = switch (format) {
|
||||||
},
|
.xml => cmark.cmark_render_xml(unwrapped, flags) orelse return error.Failed,
|
||||||
}
|
.html => cmark.cmark_render_html(unwrapped, flags) orelse return error.Failed,
|
||||||
}
|
.man => cmark.cmark_render_man(unwrapped, flags, options.width) orelse return error.Failed,
|
||||||
|
.commonmark => cmark.cmark_render_commonmark(unwrapped, flags, options.width) orelse return error.Failed,
|
||||||
|
.latex => cmark.cmark_render_latex(unwrapped, flags, options.width) orelse return error.Failed,
|
||||||
|
};
|
||||||
|
|
||||||
pub fn render_xml(self: CmarkNode, options: RenderOptions) ![:0]const u8 {
|
return std.mem.sliceTo(result, 0);
|
||||||
switch (self) {
|
|
||||||
inline else => |node| {
|
|
||||||
const result: [*:0]const u8 = cmark.cmark_render_xml(
|
|
||||||
@ptrCast(node),
|
|
||||||
@bitCast(options),
|
|
||||||
) orelse return error.Failed;
|
|
||||||
|
|
||||||
return std.mem.sliceTo(result, 0);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn render_man(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 {
|
|
||||||
switch (self) {
|
|
||||||
inline else => |node| {
|
|
||||||
const result: [*:0]const u8 = cmark.cmark_render_man(
|
|
||||||
@ptrCast(node),
|
|
||||||
@bitCast(options),
|
|
||||||
width,
|
|
||||||
) orelse return error.Failed;
|
|
||||||
|
|
||||||
return std.mem.sliceTo(result, 0);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn render_commonmark(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 {
|
|
||||||
switch (self) {
|
|
||||||
inline else => |node| {
|
|
||||||
const result: [*:0]const u8 = cmark.cmark_render_commonmark(
|
|
||||||
@ptrCast(node),
|
|
||||||
@bitCast(options),
|
|
||||||
width,
|
|
||||||
) orelse return error.Failed;
|
|
||||||
|
|
||||||
return std.mem.sliceTo(result, 0);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn render_latex(self: CmarkNode, width: u32, options: RenderOptions) ![:0]const u8 {
|
|
||||||
switch (self) {
|
|
||||||
inline else => |node| {
|
|
||||||
const result: [*:0]const u8 = cmark.cmark_render_latex(
|
|
||||||
@ptrCast(node),
|
|
||||||
@bitCast(options),
|
|
||||||
width,
|
|
||||||
) orelse return error.Failed;
|
|
||||||
|
|
||||||
return std.mem.sliceTo(result, 0);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn unlink(self: CmarkNode) void {
|
pub fn unlink(self: CmarkNode) void {
|
||||||
@ -366,6 +322,7 @@ const CmarkNode = union(enum) {
|
|||||||
done = cmark.CMARK_EVENT_DONE,
|
done = cmark.CMARK_EVENT_DONE,
|
||||||
enter = cmark.CMARK_EVENT_ENTER,
|
enter = cmark.CMARK_EVENT_ENTER,
|
||||||
exit = cmark.CMARK_EVENT_EXIT,
|
exit = cmark.CMARK_EVENT_EXIT,
|
||||||
|
visit, // a new event we introduce for nodes that will never have `exit` called on them to simplify consumer logic
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const NodeVisit = struct {
|
pub const NodeVisit = struct {
|
||||||
@ -381,13 +338,23 @@ const CmarkNode = union(enum) {
|
|||||||
const event: Event = @enumFromInt(cmark.cmark_iter_next(@ptrCast(self)));
|
const event: Event = @enumFromInt(cmark.cmark_iter_next(@ptrCast(self)));
|
||||||
switch (event) {
|
switch (event) {
|
||||||
.done => return null,
|
.done => return null,
|
||||||
.none => @panic("whoah nelly"),
|
.enter, .exit => |evt| {
|
||||||
else => |entex| {
|
const node = CmarkNode.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable;
|
||||||
return .{
|
const entex: Event = switch (node) {
|
||||||
.event = entex,
|
.html_block,
|
||||||
.node = CmarkNode.fromCNode(cmark.cmark_iter_get_node(@ptrCast(self))) catch unreachable,
|
.thematic_break,
|
||||||
|
.code_block,
|
||||||
|
.text,
|
||||||
|
.softbreak,
|
||||||
|
.linebreak,
|
||||||
|
.code,
|
||||||
|
.html_inline,
|
||||||
|
=> .visit,
|
||||||
|
else => evt,
|
||||||
};
|
};
|
||||||
|
return .{ .event = entex, .node = node };
|
||||||
},
|
},
|
||||||
|
.none, .visit => unreachable,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -534,6 +501,13 @@ pub const Parser = struct {
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn initWithWrappedAllocator(mem: *cmark.cmark_mem, options: ParseOptions) !Parser {
|
||||||
|
return cmark.cmark_parser_new_with_mem(
|
||||||
|
@bitCast(options),
|
||||||
|
mem,
|
||||||
|
) orelse error.OutOfMemory;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn feed(self: Parser, buffer: []const u8) void {
|
pub fn feed(self: Parser, buffer: []const u8) void {
|
||||||
cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len);
|
cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len);
|
||||||
}
|
}
|
||||||
@ -546,28 +520,56 @@ pub const Parser = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: Parser) void {
|
pub fn deinit(self: Parser) void {
|
||||||
cmark.cmark_parser_free(self._parser);
|
self.deinitParser();
|
||||||
self.allocator.destroy(self._cmark_mem);
|
self.allocator.destroy(self._cmark_mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn deinitParser(self: Parser) void {
|
||||||
|
cmark.cmark_parser_free(self._parser);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// the nodes hang on to a reference to the allocator, which does not play nicely at all
|
// the nodes hang on to a reference to the allocator, which does not play nicely at all
|
||||||
// with our allocator wrapping strategy. Basically, the parser has to live through
|
// with our allocator wrapping strategy. Basically, the parser has to live through
|
||||||
// node rendering. Due to this, it probably makes sense to keep a hard association
|
// node rendering. Due to this, it probably makes sense to keep a hard association
|
||||||
// between the parser and the node tree.
|
// between the parser and the node tree.
|
||||||
pub fn parse(allocator: std.mem.Allocator, buffer: []const u8, options: ParseOptions) !CmarkNode {
|
pub fn parse(allocator: *std.mem.Allocator, buffer: []const u8, options: ParseOptions) !CmarkNode {
|
||||||
const parser = try Parser.init(&allocator, options);
|
const parser = try Parser.init(allocator, options);
|
||||||
defer parser.deinit();
|
defer parser.deinitParser();
|
||||||
|
|
||||||
parser.feed(buffer);
|
parser.feed(buffer);
|
||||||
|
|
||||||
return try parser.finish;
|
return try parser.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn parseFile(allocator: std.mem.Allocator, path: []const u8, options: ParseOptions) !CmarkNode
|
// pub fn parseFile(allocator: std.mem.Allocator, path: []const u8, options: ParseOptions) !CmarkNode
|
||||||
|
|
||||||
pub fn main() void {
|
pub fn main() !void {
|
||||||
const a = std.heap.page_allocator;
|
const a = std.heap.page_allocator;
|
||||||
const parser = Parser.init(&a, .{}) catch @panic("noop");
|
const parser = try Parser.init(&a, .{});
|
||||||
defer parser.deinit();
|
defer parser.deinit();
|
||||||
|
|
||||||
|
parser.feed(
|
||||||
|
\\###### Test
|
||||||
|
\\
|
||||||
|
\\This is a test of *commonmark* **parsing**
|
||||||
|
\\
|
||||||
|
\\---
|
||||||
|
\\
|
||||||
|
\\ * `good`
|
||||||
|
\\ * [bye](bye)
|
||||||
|
\\
|
||||||
|
);
|
||||||
|
|
||||||
|
const node = try parser.finish();
|
||||||
|
defer node.deinit();
|
||||||
|
|
||||||
|
const iterator = try node.iterator();
|
||||||
|
defer iterator.deinit();
|
||||||
|
|
||||||
|
while (iterator.next()) |visit| {
|
||||||
|
std.debug.print("{s} {s}\n", .{ @tagName(visit.event), @tagName(visit.node) });
|
||||||
|
}
|
||||||
|
|
||||||
|
std.debug.print("{s}\n", .{try node.render(.html, .{})});
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user