start doodling some wrapper ideas

The main points of interest here is the allocator wrapper, which was
kind of annoying to implement. Since the Zig allocator interface
expects to know the size of all allocations, we have to secretly
allocate space for that in front of the pointer that we return. This
ends up causing a little bit of slightly nasty pointer manipulation. I
took longer to debug this than I would like to admit from a bug that
ended up being caused by storing a pointer to a stack copy of an
Allocator object. Whoops.

The other interesting thing I think (though much simpler) is
representing the options bitfield as a packed struct. I think this
makes initializing the parser options a lot nicer than the c variant.
This commit is contained in:
torque 2023-09-04 15:22:02 -07:00
parent 20b3ef3515
commit 2d099d931e
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk

View File

@ -0,0 +1,352 @@
const std = @import("std");
pub const cmark = @cImport({
@cInclude("cmark.h");
});
const Failed = error.Failed;
pub const NodeType = enum(c_int) {
none = cmark.CMARK_NODE_NONE,
// Block
document = cmark.CMARK_NODE_DOCUMENT,
block_quote = cmark.CMARK_NODE_BLOCK_QUOTE,
list = cmark.CMARK_NODE_LIST,
item = cmark.CMARK_NODE_ITEM,
code_block = cmark.CMARK_NODE_CODE_BLOCK,
html_block = cmark.CMARK_NODE_HTML_BLOCK,
custom_block = cmark.CMARK_NODE_CUSTOM_BLOCK,
paragraph = cmark.CMARK_NODE_PARAGRAPH,
heading = cmark.CMARK_NODE_HEADING,
thematic_break = cmark.CMARK_NODE_THEMATIC_BREAK,
// Inline
text = cmark.CMARK_NODE_TEXT,
softbreak = cmark.CMARK_NODE_SOFTBREAK,
linebreak = cmark.CMARK_NODE_LINEBREAK,
code = cmark.CMARK_NODE_CODE,
html_inline = cmark.CMARK_NODE_HTML_INLINE,
custom_inline = cmark.CMARK_NODE_CUSTOM_INLINE,
emph = cmark.CMARK_NODE_EMPH,
strong = cmark.CMARK_NODE_STRONG,
link = cmark.CMARK_NODE_LINK,
image = cmark.CMARK_NODE_IMAGE,
};
pub const ListType = enum(c_int) {
no_list = cmark.CMARK_NO_LIST,
bullet_list = cmark.CMARK_BULLET_LIST,
ordered_list = cmark.CMARK_ORDERED_LIST,
};
// only for ordered lists
pub const DelimType = enum(c_int) {
no_delim = cmark.CMARK_NO_DELIM,
period_delim = cmark.CMARK_PERIOD_DELIM,
paren_delim = cmark.CMARK_PAREN_DELIM,
};
pub const NodeIteratorEvent = enum(c_int) {
none = cmark.CMARK_EVENT_NONE,
done = cmark.CMARK_EVENT_DONE,
enter = cmark.CMARK_EVENT_ENTER,
exit = cmark.CMARK_EVENT_EXIT,
};
pub const CmarkOptions = packed struct(u32) {
_skip_0: bool = false, // for some reason 1 << 0 is skipped (oversight?)
include_sourcepos: bool = false, // index 1
softbreaks_as_hardbreaks: bool = false, // index 2
_skip_safe: bool = false, // index 3; deprecated, no effect
softbreaks_as_spaces: bool = false, // index 4
_skip_1: u3 = 0, // skip indices 5, 6 and 7
_skip_normalize: bool = false, // index 8; deprecated, no effect,
validate_utf8: bool = false, // index 9
smart_quotes_and_dashes: bool = false, // index 10
_skip_2: u6 = 0, // skip indices 11, 12, 13, 14, 15, 16
allow_unsafe_html: bool = false, // index 17
_padding: u14 = 0, // no other options
};
comptime {
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .include_sourcepos = true })) == cmark.CMARK_OPT_SOURCEPOS);
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .softbreaks_as_hardbreaks = true })) == cmark.CMARK_OPT_HARDBREAKS);
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .softbreaks_as_spaces = true })) == cmark.CMARK_OPT_NOBREAKS);
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .validate_utf8 = true })) == cmark.CMARK_OPT_VALIDATE_UTF8);
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .smart_quotes_and_dashes = true })) == cmark.CMARK_OPT_SMART);
std.debug.assert(@as(u32, @bitCast(CmarkOptions{ .allow_unsafe_html = true })) == cmark.CMARK_OPT_UNSAFE);
}
const AllocHeader = extern struct {
size: usize,
tip: u8,
inline fn fromAllocatedSlice(slice: []u8) *AllocHeader {
return @ptrFromInt(@intFromPtr(slice.ptr));
}
inline fn fromTipPointer(tip: *u8) *AllocHeader {
return @fieldParentPtr(AllocHeader, "tip", tip);
}
inline fn fullAllocFromTip(tip: *anyopaque) align(@alignOf(AllocHeader)) []u8 {
const hdr = fromTipPointer(@ptrCast(tip));
const mem: [*]u8 = @ptrFromInt(@intFromPtr(hdr));
return mem[0..(@sizeOf(usize) + hdr.size)];
}
};
fn cmarkCalloc(ctx: ?*anyopaque, size: usize, count: usize) callconv(.C) ?*anyopaque {
const allocator: *const std.mem.Allocator = @ptrCast(@alignCast(ctx orelse return null));
const mem_size = size * count;
const raw_mem = allocator.alignedAlloc(
u8,
@alignOf(AllocHeader),
@sizeOf(usize) + mem_size,
) catch return null;
// cmark does rely on the allocated memory being zeroed, unfortunately. The Zig
// allocator interface always fills with undefined, and this is not configurable.
@memset(raw_mem, 0);
const header = AllocHeader.fromAllocatedSlice(raw_mem);
header.size = mem_size;
return &header.tip;
}
fn cmarkRealloc(ctx: ?*anyopaque, mem: ?*anyopaque, new_size: usize) callconv(.C) ?*anyopaque {
const allocator: *const std.mem.Allocator = @ptrCast(@alignCast(ctx orelse return null));
if (mem == null) {
return cmarkCalloc(ctx, 1, new_size);
}
const raw_mem = allocator.realloc(AllocHeader.fullAllocFromTip(mem.?), new_size) catch
return null;
const header = AllocHeader.fromAllocatedSlice(raw_mem);
header.size = raw_mem.len;
return &header.tip;
}
fn cmarkFree(ctx: ?*anyopaque, mem: ?*anyopaque) callconv(.C) void {
const allocator: *const std.mem.Allocator = @ptrCast(@alignCast(ctx orelse return));
if (mem == null) return;
const raw_mem = AllocHeader.fullAllocFromTip(mem.?);
allocator.free(raw_mem);
}
fn wrapCmarkAllocator(allocator: *const std.mem.Allocator) cmark.cmark_mem {
return .{
.ctx = @constCast(allocator),
.calloc = cmarkCalloc,
.realloc = cmarkRealloc,
.free = cmarkFree,
};
}
const CmarkNode = union(enum) {
document: *CmarkOpaqueNode,
heading: *CmarkHeadingNode,
block_quote: *CmarkOpaqueNode,
bullet_list: *CmarkOpaqueNode,
ordered_list: *CmarkOrderedListNode,
item: *CmarkOpaqueNode,
code_block: *CmarkOpaqueNode,
html_block: *CmarkOpaqueNode,
custom_block: *CmarkOpaqueNode,
thematic_break: *CmarkOpaqueNode,
paragraph: *CmarkOpaqueNode,
text: *CmarkOpaqueNode,
softbreak: *CmarkOpaqueNode,
linebreak: *CmarkOpaqueNode,
html_inline: *CmarkOpaqueNode,
custom_inline: *CmarkOpaqueNode,
code: *CmarkOpaqueNode,
emph: *CmarkOpaqueNode,
strong: *CmarkOpaqueNode,
link: *CmarkOpaqueNode,
image: *CmarkOpaqueNode,
fn fromCNode(c_node: ?*cmark.cmark_node) !CmarkNode {
const node = c_node orelse return error.Failed;
switch (@as(NodeType, @enumFromInt(cmark.cmark_node_get_type(@ptrCast(node))))) {
.none => @panic("none? none??????"),
.heading => return .{ .heading = @ptrCast(node) },
.document => return .{ .document = @ptrCast(node) },
.block_quote => return .{ .block_quote = @ptrCast(node) },
.list => {
if (cmark.cmark_node_get_list_type(node) == cmark.CMARK_BULLET_LIST)
return .{ .bullet_list = @ptrCast(node) }
else
return .{ .ordered_list = @ptrCast(node) };
},
.item => return .{ .item = @ptrCast(node) },
.code_block => return .{ .code_block = @ptrCast(node) },
.html_block => return .{ .html_block = @ptrCast(node) },
.custom_block => return .{ .custom_block = @ptrCast(node) },
.paragraph => return .{ .paragraph = @ptrCast(node) },
.thematic_break => return .{ .thematic_break = @ptrCast(node) },
.text => return .{ .text = @ptrCast(node) },
.softbreak => return .{ .softbreak = @ptrCast(node) },
.linebreak => return .{ .linebreak = @ptrCast(node) },
.code => return .{ .code = @ptrCast(node) },
.html_inline => return .{ .html_inline = @ptrCast(node) },
.custom_inline => return .{ .custom_inline = @ptrCast(node) },
.emph => return .{ .emph = @ptrCast(node) },
.strong => return .{ .strong = @ptrCast(node) },
.link => return .{ .link = @ptrCast(node) },
.image => return .{ .image = @ptrCast(node) },
}
}
};
pub fn CmarkNodeCommon(comptime Self: type) type {
return struct {
pub fn getUserData(self: *Self) ?*anyopaque {
return @ptrCast(cmark.cmark_node_get_user_data(@ptrCast(self)));
}
pub fn setUserData(self: *Self, user_data: ?*anyopaque) bool {
return cmark.cmark_node_set_user_data(@ptrCast(self), user_data) == 1;
}
};
}
pub fn CmarkBlockNodeContents(comptime Self: type) type {
return struct {
pub fn getContent(self: *Self) [:0]const u8 {
return cmark.cmark_node_get_literal(@ptrCast(self));
}
pub fn setContent(self: *Self, new: [:0]const u8) bool {
return cmark.cmark_node_set_literal(@ptrCast(self), new.ptr);
}
};
}
pub const CmarkOpaqueNode = opaque {
pub usingnamespace CmarkNodeCommon(@This());
};
pub const CmarkHeadingNode = opaque {
pub fn getLevel(self: *CmarkHeadingNode) i3 {
return @intCast(cmark.cmark_node_get_heading_level(@ptrCast(self)));
}
pub fn setLevel(self: *CmarkHeadingNode, level: i3) !void {
if (cmark.cmark_node_set_heading_level(@ptrCast(self), level) != 1)
return error.Failed;
}
pub usingnamespace CmarkNodeCommon(@This());
};
pub const CmarkOrderedListNode = opaque {
pub fn getDelimeter(self: *CmarkOrderedListNode) DelimType {
return @enumFromInt(cmark.cmark_node_get_list_delim(@ptrCast(self)));
}
pub fn setDelimiter(self: *CmarkOrderedListNode, new: DelimType) !void {
if (cmark.cmark_node_set_list_delim(@ptrCast(self), @intFromEnum(new)) != 1)
return error.Failed;
}
pub fn getStart(self: *CmarkOrderedListNode) i32 {
return @intCast(cmark.cmark_node_get_list_start(@ptrCast(self)));
}
pub fn setStart(self: *CmarkOrderedListNode, start: i32) !void {
if (cmark.cmark_node_get_list_start(@ptrCast(self), @intCast(start)) != 1)
return error.Failed;
}
pub fn getTight(self: *CmarkOrderedListNode) bool {
return cmark.cmark_node_get_list_start(@ptrCast(self)) == 1;
}
pub fn setTight(self: *CmarkOrderedListNode, tight: bool) !void {
if (cmark.cmark_node_get_list_start(@ptrCast(self), @intFromBool(tight)) != 1)
return error.Failed;
}
pub usingnamespace CmarkNodeCommon(@This());
};
pub const CmarkCodeBlockNode = opaque {
pub fn getFenceInfo(self: *CmarkHeadingNode) [:0]const u8 {
const str: [*:0]const u8 = cmark.cmark_node_get_fence_info(@ptrCast(self)) orelse
return error.Failed;
return std.mem.sliceTo(str, 0);
}
pub fn setFenceInfo(self: *CmarkHeadingNode, info: [:0]const u8) !void {
if (cmark.cmark_node_set_fence_info(@ptrCast(self), info.ptr) != 1)
return error.Failed;
}
pub usingnamespace CmarkNodeCommon(@This());
};
pub const Parser = struct {
allocator: *const std.mem.Allocator,
_cmark_mem: *cmark.cmark_mem,
_parser: *cmark.cmark_parser,
pub fn new(allocator: *const std.mem.Allocator, options: CmarkOptions) !Parser {
var self: Parser = .{
.allocator = allocator,
._cmark_mem = undefined,
._parser = undefined,
};
self._cmark_mem = try allocator.create(cmark.cmark_mem);
self._cmark_mem.* = wrapCmarkAllocator(self.allocator);
self._parser = cmark.cmark_parser_new_with_mem(
@bitCast(options),
self._cmark_mem,
) orelse return error.OutOfMemory;
return self;
}
pub fn feed(self: Parser, buffer: []const u8) void {
cmark.cmark_parser_feed(self._parser, buffer.ptr, buffer.len);
}
pub fn finish(self: Parser) CmarkNode {
return CmarkNode.fromCNode(cmark.cmark_parser_finish(self._parser));
}
pub fn deinit(self: Parser) void {
cmark.cmark_parser_free(self._parser);
self.allocator.destroy(self._cmark_mem);
}
};
// pub fn parse(buffer: []const u8, options: CmarkOptions) !CmarkNode
// pub fn parseFile(path: []const u8, options: CmarkOptions) !CmarkNode
pub fn main() void {
const a = std.heap.page_allocator;
const parser = Parser.new(&a, .{}) catch @panic("noop");
defer parser.deinit();
}