diff --git a/build.zig b/build.zig index 94c5dff..2197715 100644 --- a/build.zig +++ b/build.zig @@ -9,7 +9,7 @@ pub fn build(b: *std.Build) void { const optimize = b.standardOptimizeOption(.{}); const yaml_zig = b.addModule("libyaml", .{ - .source_file = .{ .path = "src/libyaml.zig" }, + .source_file = .{ .path = "src/yaml.zig" }, }); // yaml_zig.addIncludePath(.{ .path = b.getInstallPath(.header, "") }); // _ = yaml_zig; diff --git a/src/libyaml.zig b/src/libyaml.zig index 0814a4c..64a313e 100644 --- a/src/libyaml.zig +++ b/src/libyaml.zig @@ -1,567 +1,388 @@ -const std = @import("std"); +pub const Encoding = enum(c_int) { + any, + utf8, + utf16le, + utf16be, +}; -pub const Scalar = []const u8; -pub const List = []Value; -pub const Map = std.StringArrayHashMapUnmanaged(Value); +pub const VersionDirective = extern struct { + major: c_int, + minor: c_int, +}; -pub fn Owned(comptime T: type) type { - return struct { - root: T, - allocator: *std.heap.ArenaAllocator, +pub const TagDirective = extern struct { + handle: ?[*:0]u8, + prefix: ?[*:0]u8, +}; - pub fn deinit(self: @This()) void { - const child = self.allocator.child_allocator; - self.allocator.deinit(); - child.destroy(self.allocator); - } - }; -} +pub const LineBreak = enum(c_int) { + any, + cr, + lf, + crlf, +}; -pub const Value = union(enum) { - scalar: Scalar, - list: List, - map: Map, +pub const ErrorType = enum(c_int) { + okay, + alloc_error, + read_error, + scanner_error, + parser_error, + composer_error, + writer_error, + emitter_error, +}; - pub fn fromString(allocator: std.mem.Allocator, data: []const u8) !Owned(Value) { - var parser = try libyaml.Parser.init(); - defer parser.deinit(); +pub const Mark = extern struct { + index: usize, + line: usize, + column: usize, +}; - parser.setInputString(data); +pub const ScalarStyle = enum(c_int) { + any, + plain, + single_quoted, + double_quoted, + literal, + folded, +}; - var builder = try Builder.init(allocator); - errdefer builder.deinit(); +pub const SequenceStyle = enum(c_int) { + any, + block, + flow, +}; - var docseen = false; - while (true) { - var event: libyaml.Event = undefined; - parser.parse(&event) catch { - std.debug.print( - "parser failed: {s}, {s}, line {d}, col: {d}\n", - .{ @tagName(parser.@"error"), parser.problem.?, parser.problem_mark.line, parser.problem_mark.column }, - ); - return error.Failed; - }; - defer event.deinit(); +pub const MappingStyle = enum(c_int) { + any, + block, + flow, +}; - std.debug.print("event: {s}\n", .{@tagName(event.type)}); +pub const TokenType = enum(c_int) { + none, + stream_start, + stream_end, + version_directive, + tag_directive, + document_start, + document_end, + block_sequence_start, + block_mapping_start, + block_end, + flow_sequence_start, + flow_sequence_end, + flow_mapping_start, + flow_mapping_end, + block_entry, + flow_entry, + key, + value, + alias, + anchor, + tag, + scalar, +}; - switch (event.type) { - .empty => return error.Failed, - .stream_start => {}, - .stream_end => break, - .document_start => {}, - .document_end => docseen = if (docseen) return error.Failed else true, - .alias => return error.Failed, - .scalar => try builder.pushScalar(event.data.scalar.value[0..event.data.scalar.length]), - .sequence_start => try builder.startList(), - .sequence_end => try builder.endList(), - .mapping_start => try builder.startMap(), - .mapping_end => try builder.endMap(), - } - } +pub const Token = extern struct { + type: TokenType, + data: extern union { + stream_start: extern struct { + encoding: Encoding, + }, + alias: extern struct { + value: ?[*:0]u8, + }, + anchor: extern struct { + value: ?[*:0]u8, + }, + tag: extern struct { + handle: ?[*:0]u8, + suffix: ?[*:0]u8, + }, + scalar: extern struct { + value: [*]u8, + length: usize, + style: ScalarStyle, + }, + version_directive: VersionDirective, + tag_directive: TagDirective, + }, + start_mark: Mark, + end_mark: Mark, +}; - return builder.disown(); +pub const EventType = enum(c_int) { + empty, + stream_start, + stream_end, + document_start, + document_end, + alias, + scalar, + sequence_start, + sequence_end, + mapping_start, + mapping_end, +}; + +pub const Event = extern struct { + type: EventType, + data: extern union { + stream_start: extern struct { + encoding: Encoding, + }, + document_start: extern struct { + version_directive: ?*VersionDirective, + tag_directives: extern struct { + start: ?*TagDirective, + end: ?*TagDirective, + }, + implicit: c_int, + }, + document_end: extern struct { implicit: c_int }, + alias: extern struct { anchor: [*:0]u8 }, + scalar: extern struct { + anchor: ?[*:0]u8, + tag: ?[*:0]u8, + value: [*]u8, + length: usize, + plain_implicit: c_int, + quoted_implicit: c_int, + style: ScalarStyle, + }, + sequence_start: extern struct { + anchor: ?[*:0]u8, + tag: ?[*:0]u8, + implicit: c_int, + style: SequenceStyle, + }, + mapping_start: extern struct { + anchor: ?[*:0]u8, + tag: ?[*:0]u8, + implicit: c_int, + style: MappingStyle, + }, + }, + start_mark: Mark, + end_mark: Mark, + + pub fn deinit(self: *Event) void { + yaml_event_delete(self); } - pub const Builder = struct { - pub const Stack = union(enum) { - root, - list: std.ArrayListUnmanaged(Value), - map: struct { - lastkey: ?Scalar = null, - map: Map, - }, - }; - - allocator: std.mem.Allocator, - container_stack: std.ArrayListUnmanaged(Stack), - root: Value, - - pub fn init(child_allocator: std.mem.Allocator) std.mem.Allocator.Error!Builder { - const arena = try child_allocator.create(std.heap.ArenaAllocator); - arena.* = std.heap.ArenaAllocator.init(child_allocator); - const allocator = arena.allocator(); - - var stack = try std.ArrayListUnmanaged(Stack).initCapacity(allocator, 1); - stack.appendAssumeCapacity(.root); - - return .{ - .allocator = allocator, - .container_stack = stack, - .root = .{ .scalar = "" }, - }; - } - - // this should only be run on failure. - pub fn deinit(self: Builder) void { - const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(self.allocator.ptr)); - const alloc = arena.child_allocator; - arena.deinit(); - alloc.destroy(arena); - } - - pub fn disown(self: *Builder) Owned(Value) { - return .{ - .root = self.root, - .allocator = @ptrCast(@alignCast(self.allocator.ptr)), - }; - } - - fn pushScalar(self: *Builder, value: Scalar) !void { - switch (self.container_stack.items[self.container_stack.items.len - 1]) { - .root => { - self.root = .{ .scalar = try self.allocator.dupe(u8, value) }; - }, - .list => |*builder| try builder.append( - self.allocator, - .{ .scalar = try self.allocator.dupe(u8, value) }, - ), - .map => |*builder| { - if (builder.lastkey) |key| { - try builder.map.put(self.allocator, key, .{ .scalar = try self.allocator.dupe(u8, value) }); - builder.lastkey = null; - } else { - const duped = try self.allocator.dupe(u8, value); - try builder.map.put(self.allocator, duped, undefined); - builder.lastkey = duped; - } - }, - } - } - - fn startList(self: *Builder) !void { - try self.container_stack.append(self.allocator, .{ .list = .{} }); - } - - fn endList(self: *Builder) !void { - var top = self.container_stack.pop(); - - switch (self.container_stack.items[self.container_stack.items.len - 1]) { - .root => self.root = .{ .list = try top.list.toOwnedSlice(self.allocator) }, - .list => |*builder| try builder.append( - self.allocator, - .{ .list = try top.list.toOwnedSlice(self.allocator) }, - ), - .map => |*builder| { - if (builder.lastkey) |key| { - try builder.map.put(self.allocator, key, .{ .list = try top.list.toOwnedSlice(self.allocator) }); - builder.lastkey = null; - } else return error.Failed; - }, - } - } - - fn startMap(self: *Builder) !void { - try self.container_stack.append(self.allocator, .{ .map = .{ .map = .{} } }); - } - - fn endMap(self: *Builder) !void { - var top = self.container_stack.pop(); - - switch (self.container_stack.items[self.container_stack.items.len - 1]) { - .root => self.root = .{ .map = top.map.map }, - .list => |*builder| try builder.append( - self.allocator, - .{ .map = top.map.map }, - ), - .map => |*builder| { - if (builder.lastkey) |key| { - try builder.map.put(self.allocator, key, .{ .map = top.map.map }); - builder.lastkey = null; - } else return error.Failed; - }, - } - } - }; + pub extern fn yaml_event_delete(event: *Event) void; }; -pub const libyaml = struct { - pub const Encoding = enum(c_int) { - any, - utf8, - utf16le, - utf16be, - }; - - pub const VersionDirective = extern struct { - major: c_int, - minor: c_int, - }; - - pub const TagDirective = extern struct { - handle: ?[*:0]u8, - prefix: ?[*:0]u8, - }; - - pub const LineBreak = enum(c_int) { - any, - cr, - lf, - crlf, - }; - - pub const ErrorType = enum(c_int) { - okay, - alloc_error, - read_error, - scanner_error, - parser_error, - composer_error, - writer_error, - emitter_error, - }; - - pub const Mark = extern struct { - index: usize, - line: usize, - column: usize, - }; - - pub const ScalarStyle = enum(c_int) { - any, - plain, - single_quoted, - double_quoted, - literal, - folded, - }; - - pub const SequenceStyle = enum(c_int) { - any, - block, - flow, - }; - - pub const MappingStyle = enum(c_int) { - any, - block, - flow, - }; - - pub const TokenType = enum(c_int) { - none, - stream_start, - stream_end, - version_directive, - tag_directive, - document_start, - document_end, - block_sequence_start, - block_mapping_start, - block_end, - flow_sequence_start, - flow_sequence_end, - flow_mapping_start, - flow_mapping_end, - block_entry, - flow_entry, - key, - value, - alias, - anchor, - tag, - scalar, - }; - - pub const Token = extern struct { - type: TokenType, - data: extern union { - stream_start: extern struct { - encoding: Encoding, - }, - alias: extern struct { - value: ?[*:0]u8, - }, - anchor: extern struct { - value: ?[*:0]u8, - }, - tag: extern struct { - handle: ?[*:0]u8, - suffix: ?[*:0]u8, - }, - scalar: extern struct { - value: [*]u8, - length: usize, - style: ScalarStyle, - }, - version_directive: VersionDirective, - tag_directive: TagDirective, - }, - start_mark: Mark, - end_mark: Mark, - }; - - pub const EventType = enum(c_int) { - empty, - stream_start, - stream_end, - document_start, - document_end, - alias, - scalar, - sequence_start, - sequence_end, - mapping_start, - mapping_end, - }; - - pub const Event = extern struct { - type: EventType, - data: extern union { - stream_start: extern struct { - encoding: Encoding, - }, - document_start: extern struct { - version_directive: ?*VersionDirective, - tag_directives: extern struct { - start: ?*TagDirective, - end: ?*TagDirective, - }, - implicit: c_int, - }, - document_end: extern struct { implicit: c_int }, - alias: extern struct { anchor: [*:0]u8 }, - scalar: extern struct { - anchor: ?[*:0]u8, - tag: ?[*:0]u8, - value: [*]u8, - length: usize, - plain_implicit: c_int, - quoted_implicit: c_int, - style: ScalarStyle, - }, - sequence_start: extern struct { - anchor: ?[*:0]u8, - tag: ?[*:0]u8, - implicit: c_int, - style: SequenceStyle, - }, - mapping_start: extern struct { - anchor: ?[*:0]u8, - tag: ?[*:0]u8, - implicit: c_int, - style: MappingStyle, - }, - }, - start_mark: Mark, - end_mark: Mark, - - pub fn deinit(self: *Event) void { - yaml_event_delete(self); - } - - pub extern fn yaml_event_delete(event: *Event) void; - }; - - pub const SimpleKey = extern struct { - possible: c_int, - required: c_int, - token_number: usize, - mark: Mark, - }; - - pub const NodeType = enum(c_int) { - none, - scalar, - sequence, - mapping, - }; - - pub const NodeItem = c_int; - - pub const NodePair = extern struct { - key: c_int, - value: c_int, - }; - - pub const Node = extern struct { - type: NodeType, - tag: ?[*:0]u8, - data: extern union { - scalar: extern struct { - value: ?[*:0]u8, - length: usize, - style: ScalarStyle, - }, - sequence: extern struct { - items: extern struct { - start: ?*NodeItem, - end: ?*NodeItem, - top: ?*NodeItem, - }, - style: SequenceStyle, - }, - mapping: extern struct { - pairs: extern struct { - start: ?*NodePair, - end: ?*NodePair, - top: ?*NodePair, - }, - style: MappingStyle, - }, - }, - start_mark: Mark, - end_mark: Mark, - }; - - pub const Document = extern struct { - nodes: extern struct { - start: ?*Node, - end: ?*Node, - top: ?*Node, - }, - version_directive: ?*VersionDirective, - tag_directives: extern struct { - start: ?*TagDirective, - end: ?*TagDirective, - }, - start_implicit: c_int, - end_implicit: c_int, - start_mark: Mark, - end_mark: Mark, - }; - - pub const AliasData = extern struct { - anchor: ?[*]u8, - index: c_int, - mark: Mark, - }; - - pub const ReadHandler = *const fn (ctx: ?*anyopaque, buffer: [*]u8, buffer_size: usize, bytes_read: *usize) callconv(.C) c_int; - - pub const ParserState = enum(c_int) { - stream_start, - implicit_document_start, - document_start, - document_content, - document_end, - block_node, - block_node_or_indentless_sequence, - flow_node, - block_sequence_first_entry, - block_sequence_entry, - indentless_sequence_entry, - block_mapping_first_key, - block_mapping_key, - block_mapping_value, - flow_sequence_first_entry, - flow_sequence_entry, - flow_sequence_entry_mapping_key, - flow_sequence_entry_mapping_value, - flow_sequence_entry_mapping_end, - flow_mapping_first_key, - flow_mapping_key, - flow_mapping_value, - flow_mapping_empty_value, - end, - }; - - pub const Parser = extern struct { - @"error": ErrorType, - problem: ?[*:0]const u8, - problem_offset: usize, - problem_value: c_int, - problem_mark: Mark, - context: ?[*:0]const u8, - context_mark: Mark, - read_handler: ?ReadHandler, - read_handler_data: ?*anyopaque, - input: extern union { - string: extern struct { - start: ?[*]const u8, - end: ?[*]const u8, - current: ?[*]const u8, - }, - file: ?*anyopaque, - }, - eof: c_int, - buffer: extern struct { - start: ?[*]u8, - end: ?[*]u8, - pointer: ?[*]u8, - last: ?[*]u8, - }, - unread: usize, - raw_buffer: extern struct { - start: ?[*]u8, - end: ?[*]u8, - pointer: ?[*]u8, - last: ?[*]u8, - }, - encoding: Encoding, - offset: usize, - mark: Mark, - stream_start_produced: c_int, - stream_end_produced: c_int, - flow_level: c_int, - tokens: extern struct { - start: ?*Token, - end: ?*Token, - head: ?*Token, - tail: ?*Token, - }, - tokens_parsed: usize, - token_available: c_int, - indents: extern struct { - start: ?*c_int, - end: ?*c_int, - top: ?*c_int, - }, - indent: c_int, - simple_key_allowed: c_int, - simple_keys: extern struct { - start: ?*SimpleKey, - end: ?*SimpleKey, - top: ?*SimpleKey, - }, - states: extern struct { - start: ?*SimpleKey, - end: ?*SimpleKey, - top: ?*SimpleKey, - }, - state: ParserState, - marks: extern struct { - start: ?*Mark, - end: ?*Mark, - top: ?*Mark, - }, - tag_directives: extern struct { - start: ?*TagDirective, - end: ?*TagDirective, - top: ?*TagDirective, - }, - aliases: extern struct { - start: ?*AliasData, - end: ?*AliasData, - top: ?*AliasData, - }, - document: ?*Document, - - pub fn init() !Parser { - var parser: Parser = undefined; - if (yaml_parser_initialize(&parser) == 0) return error.Failed; - return parser; - } - - pub fn deinit(self: *Parser) void { - yaml_parser_delete(self); - } - - pub fn setInputString(self: *Parser, input: []const u8) void { - yaml_parser_set_input_string(self, input.ptr, input.len); - } - - pub fn parse(self: *Parser, event: *Event) !void { - if (yaml_parser_parse(self, event) == 0) return error.Failed; - } - - pub extern fn yaml_parser_initialize(parser: *Parser) c_int; - pub extern fn yaml_parser_delete(parser: *Parser) void; - pub extern fn yaml_parser_set_input_string(parser: *Parser, input: [*]const u8, size: usize) void; - pub extern fn yaml_parser_set_input(parser: *Parser, handler: ReadHandler, data: ?*anyopaque) void; - pub extern fn yaml_parser_set_encoding(parser: *Parser, encoding: Encoding) void; - pub extern fn yaml_parser_scan(parser: *Parser, token: *Token) c_int; - pub extern fn yaml_parser_parse(parser: *Parser, event: *Event) c_int; - pub extern fn yaml_parser_load(parser: *Parser, document: *Document) c_int; - }; +pub const SimpleKey = extern struct { + possible: c_int, + required: c_int, + token_number: usize, + mark: Mark, +}; + +pub const NodeType = enum(c_int) { + none, + scalar, + sequence, + mapping, +}; + +pub const NodeItem = c_int; + +pub const NodePair = extern struct { + key: c_int, + value: c_int, +}; + +pub const Node = extern struct { + type: NodeType, + tag: ?[*:0]u8, + data: extern union { + scalar: extern struct { + value: ?[*:0]u8, + length: usize, + style: ScalarStyle, + }, + sequence: extern struct { + items: extern struct { + start: ?*NodeItem, + end: ?*NodeItem, + top: ?*NodeItem, + }, + style: SequenceStyle, + }, + mapping: extern struct { + pairs: extern struct { + start: ?*NodePair, + end: ?*NodePair, + top: ?*NodePair, + }, + style: MappingStyle, + }, + }, + start_mark: Mark, + end_mark: Mark, +}; + +pub const Document = extern struct { + nodes: extern struct { + start: ?*Node, + end: ?*Node, + top: ?*Node, + }, + version_directive: ?*VersionDirective, + tag_directives: extern struct { + start: ?*TagDirective, + end: ?*TagDirective, + }, + start_implicit: c_int, + end_implicit: c_int, + start_mark: Mark, + end_mark: Mark, +}; + +pub const AliasData = extern struct { + anchor: ?[*]u8, + index: c_int, + mark: Mark, +}; + +pub const ReadHandler = *const fn (ctx: ?*anyopaque, buffer: [*]u8, buffer_size: usize, bytes_read: *usize) callconv(.C) c_int; + +pub const ParserState = enum(c_int) { + stream_start, + implicit_document_start, + document_start, + document_content, + document_end, + block_node, + block_node_or_indentless_sequence, + flow_node, + block_sequence_first_entry, + block_sequence_entry, + indentless_sequence_entry, + block_mapping_first_key, + block_mapping_key, + block_mapping_value, + flow_sequence_first_entry, + flow_sequence_entry, + flow_sequence_entry_mapping_key, + flow_sequence_entry_mapping_value, + flow_sequence_entry_mapping_end, + flow_mapping_first_key, + flow_mapping_key, + flow_mapping_value, + flow_mapping_empty_value, + end, +}; + +pub const Parser = extern struct { + @"error": ErrorType, + problem: ?[*:0]const u8, + problem_offset: usize, + problem_value: c_int, + problem_mark: Mark, + context: ?[*:0]const u8, + context_mark: Mark, + read_handler: ?ReadHandler, + read_handler_data: ?*anyopaque, + input: extern union { + string: extern struct { + start: ?[*]const u8, + end: ?[*]const u8, + current: ?[*]const u8, + }, + file: ?*anyopaque, + }, + eof: c_int, + buffer: extern struct { + start: ?[*]u8, + end: ?[*]u8, + pointer: ?[*]u8, + last: ?[*]u8, + }, + unread: usize, + raw_buffer: extern struct { + start: ?[*]u8, + end: ?[*]u8, + pointer: ?[*]u8, + last: ?[*]u8, + }, + encoding: Encoding, + offset: usize, + mark: Mark, + stream_start_produced: c_int, + stream_end_produced: c_int, + flow_level: c_int, + tokens: extern struct { + start: ?*Token, + end: ?*Token, + head: ?*Token, + tail: ?*Token, + }, + tokens_parsed: usize, + token_available: c_int, + indents: extern struct { + start: ?*c_int, + end: ?*c_int, + top: ?*c_int, + }, + indent: c_int, + simple_key_allowed: c_int, + simple_keys: extern struct { + start: ?*SimpleKey, + end: ?*SimpleKey, + top: ?*SimpleKey, + }, + states: extern struct { + start: ?*SimpleKey, + end: ?*SimpleKey, + top: ?*SimpleKey, + }, + state: ParserState, + marks: extern struct { + start: ?*Mark, + end: ?*Mark, + top: ?*Mark, + }, + tag_directives: extern struct { + start: ?*TagDirective, + end: ?*TagDirective, + top: ?*TagDirective, + }, + aliases: extern struct { + start: ?*AliasData, + end: ?*AliasData, + top: ?*AliasData, + }, + document: ?*Document, + + pub fn init() !Parser { + var parser: Parser = undefined; + if (yaml_parser_initialize(&parser) == 0) return error.Failed; + return parser; + } + + pub fn deinit(self: *Parser) void { + yaml_parser_delete(self); + } + + pub fn setInputString(self: *Parser, input: []const u8) void { + yaml_parser_set_input_string(self, input.ptr, input.len); + } + + pub fn parse(self: *Parser, event: *Event) !void { + if (yaml_parser_parse(self, event) == 0) return error.Failed; + } + + pub extern fn yaml_parser_initialize(parser: *Parser) c_int; + pub extern fn yaml_parser_delete(parser: *Parser) void; + pub extern fn yaml_parser_set_input_string(parser: *Parser, input: [*]const u8, size: usize) void; + pub extern fn yaml_parser_set_input(parser: *Parser, handler: ReadHandler, data: ?*anyopaque) void; + pub extern fn yaml_parser_set_encoding(parser: *Parser, encoding: Encoding) void; + pub extern fn yaml_parser_scan(parser: *Parser, token: *Token) c_int; + pub extern fn yaml_parser_parse(parser: *Parser, event: *Event) c_int; + pub extern fn yaml_parser_load(parser: *Parser, document: *Document) c_int; }; diff --git a/src/main.zig b/src/main.zig index 82503f3..fb46313 100644 --- a/src/main.zig +++ b/src/main.zig @@ -14,7 +14,14 @@ pub fn main() !void { ); defer allocator.free(slurp); - const doc = try yaml.Value.fromString(allocator, slurp); + var diag = yaml.ParseDiagnostic{ .message = "?????" }; + const doc = yaml.Document.fromString(allocator, slurp, &diag) catch |err| { + std.debug.print( + "Failed to parse line: {d}, col: {d}: {s}\n", + .{ diag.line, diag.col, diag.message }, + ); + return err; + }; defer doc.deinit(); std.debug.print("\n-----\n\n", .{}); diff --git a/src/yaml.zig b/src/yaml.zig new file mode 100644 index 0000000..7957c45 --- /dev/null +++ b/src/yaml.zig @@ -0,0 +1,300 @@ +const std = @import("std"); + +pub const libyaml = @import("./libyaml.zig"); + +pub const Scalar = []const u8; +pub const List = []Value; +pub const Map = std.StringArrayHashMapUnmanaged(Value); + +pub const ParseDiagnostic = struct { + message: []const u8, + line: usize = 0, + col: usize = 0, + + pub fn set(self: *ParseDiagnostic, mark: libyaml.Mark, message: []const u8) void { + self.line = mark.line + 1; + self.col = mark.column; + self.message = message; + } + + pub fn setMark(self: *ParseDiagnostic, mark: libyaml.Mark) void { + self.line = mark.line + 1; + self.col = mark.column; + } + + pub fn setMessage(self: *ParseDiagnostic, message: []const u8) void { + self.message = message; + } +}; + +pub const Document = struct { + root: Value, + allocator: *std.heap.ArenaAllocator, + + pub fn fromString(allocator: std.mem.Allocator, data: []const u8, diag: *ParseDiagnostic) !Document { + var parser = libyaml.Parser.init() catch { + diag.setMessage("could not initialize libyaml parser"); + return error.Failed; + }; + defer parser.deinit(); + + parser.setInputString(data); + + var builder = Value.Builder.init(allocator) catch { + diag.setMessage("could not initialize value builder: out of memory"); + return error.Failed; + }; + errdefer builder.deinit(); + + var docseen = false; + while (true) { + var event: libyaml.Event = undefined; + parser.parse(&event) catch { + diag.set( + parser.problem_mark, + if (parser.problem) |problem| + std.mem.span(problem) + else + "parsing failed without a description", + ); + return error.Failed; + }; + defer event.deinit(); + + switch (event.type) { + .empty => { + diag.set(event.start_mark, "an empty event was generated (???)"); + return error.Failed; + }, + .alias => { + diag.set(event.start_mark, "an alias node was encountered (these are not supported)"); + return error.Failed; + }, + .document_start => { + if (docseen) { + diag.set(event.start_mark, "A second YAML document was found"); + return error.Failed; + } + docseen = true; + }, + .scalar => builder.pushScalar(event.data.scalar.value[0..event.data.scalar.length], diag) catch { + diag.setMark(event.start_mark); + return error.Failed; + }, + .sequence_start => builder.startList(diag) catch { + diag.setMark(event.start_mark); + return error.Failed; + }, + .sequence_end => builder.endList(diag) catch { + diag.setMark(event.start_mark); + return error.Failed; + }, + .mapping_start => builder.startMap(diag) catch { + diag.setMark(event.start_mark); + return error.Failed; + }, + .mapping_end => builder.endMap(diag) catch { + diag.setMark(event.start_mark); + return error.Failed; + }, + .stream_start, .document_end => {}, + .stream_end => break, + } + } + + return builder.document() catch { + diag.setMessage("The value builder container stack is not empty, somehow?"); + return error.Failed; + }; + } + + pub fn deinit(self: Document) void { + const child = self.allocator.child_allocator; + self.allocator.deinit(); + child.destroy(self.allocator); + } +}; + +pub const Value = union(enum) { + scalar: Scalar, + list: List, + map: Map, + + pub const Builder = struct { + pub const Stack = union(enum) { + root, + list: std.ArrayListUnmanaged(Value), + map: struct { + lastkey: ?Scalar = null, + map: Map, + }, + }; + + allocator: std.mem.Allocator, + container_stack: std.ArrayListUnmanaged(Stack), + root: Value, + + pub fn init(child_allocator: std.mem.Allocator) std.mem.Allocator.Error!Builder { + const arena = try child_allocator.create(std.heap.ArenaAllocator); + arena.* = std.heap.ArenaAllocator.init(child_allocator); + const allocator = arena.allocator(); + + var stack = try std.ArrayListUnmanaged(Stack).initCapacity(allocator, 1); + stack.appendAssumeCapacity(.root); + + return .{ + .allocator = allocator, + .container_stack = stack, + .root = .{ .scalar = "" }, + }; + } + + // this should only be run on failure. + pub fn deinit(self: Builder) void { + const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(self.allocator.ptr)); + const alloc = arena.child_allocator; + arena.deinit(); + alloc.destroy(arena); + } + + pub fn document(self: *Builder) !Document { + if (self.container_stack.getLast() != .root) + return error.Failed; + + return .{ + .root = self.root, + .allocator = @ptrCast(@alignCast(self.allocator.ptr)), + }; + } + + fn pushScalar(self: *Builder, value: Scalar, diag: *ParseDiagnostic) !void { + switch (self.container_stack.items[self.container_stack.items.len - 1]) { + .root => { + self.root = .{ .scalar = try self.allocator.dupe(u8, value) }; + }, + .list => |*builder| builder.append(self.allocator, .{ + .scalar = self.allocator.dupe(u8, value) catch { + diag.setMessage("could not duplicate scalar (out of memory)"); + return error.Failed; + }, + }) catch { + diag.setMessage("could not append scalar to list (out of memory)"); + return error.Failed; + }, + .map => |*builder| { + if (builder.lastkey) |key| { + builder.map.put(self.allocator, key, .{ + .scalar = self.allocator.dupe(u8, value) catch { + diag.setMessage("could not duplicate scalar (out of memory)"); + return error.Failed; + }, + }) catch { + diag.setMessage("could not set map value (out of memory)"); + return error.Failed; + }; + builder.lastkey = null; + } else { + const duped = self.allocator.dupe(u8, value) catch { + diag.setMessage("could not duplicate scalar (out of memory)"); + return error.Failed; + }; + builder.map.put(self.allocator, duped, undefined) catch { + diag.setMessage("could not set map key (out of memory)"); + return error.Failed; + }; + builder.lastkey = duped; + } + }, + } + } + + fn startList(self: *Builder, diag: *ParseDiagnostic) !void { + self.container_stack.append(self.allocator, .{ .list = .{} }) catch { + diag.setMessage("could not add list to stack: out of memory"); + return error.Failed; + }; + } + + fn endList(self: *Builder, diag: *ParseDiagnostic) !void { + var top = self.container_stack.pop(); + if (top != .list) { + diag.setMessage("list ended when a list was not the top container"); + return error.Failed; + } + + switch (self.container_stack.items[self.container_stack.items.len - 1]) { + .root => self.root = .{ + .list = top.list.toOwnedSlice(self.allocator) catch { + diag.setMessage("could not take ownership of list"); + return error.Failed; + }, + }, + .list => |*builder| builder.append(self.allocator, .{ + .list = top.list.toOwnedSlice(self.allocator) catch { + diag.setMessage("could not take ownership of list"); + return error.Failed; + }, + }) catch { + diag.setMessage("could not append list to list"); + return error.Failed; + }, + .map => |*builder| { + if (builder.lastkey) |key| { + builder.map.put(self.allocator, key, .{ + .list = top.list.toOwnedSlice(self.allocator) catch { + diag.setMessage("could not take ownership of list"); + return error.Failed; + }, + }) catch { + diag.setMessage("could not put list in map"); + return error.Failed; + }; + builder.lastkey = null; + } else { + diag.setMessage("found a list masquerading as a map key (only scalar keys are supported)"); + return error.Failed; + } + }, + } + } + + fn startMap(self: *Builder, diag: *ParseDiagnostic) !void { + self.container_stack.append(self.allocator, .{ .map = .{ .map = .{} } }) catch { + diag.setMessage("could not add map to stack: out of memory"); + return error.Failed; + }; + } + + fn endMap(self: *Builder, diag: *ParseDiagnostic) !void { + var top = self.container_stack.pop(); + + if (top != .map) { + diag.setMessage("map ended when a map was not the top container"); + return error.Failed; + } + + switch (self.container_stack.items[self.container_stack.items.len - 1]) { + .root => self.root = .{ .map = top.map.map }, + .list => |*builder| builder.append( + self.allocator, + .{ .map = top.map.map }, + ) catch { + diag.setMessage("could not append map to list"); + return error.Failed; + }, + .map => |*builder| { + if (builder.lastkey) |key| { + builder.map.put(self.allocator, key, .{ .map = top.map.map }) catch { + diag.setMessage("could not put map in map"); + return error.Failed; + }; + builder.lastkey = null; + } else { + diag.setMessage("found a map masquerading as a map key (only scalar keys are supported)"); + return error.Failed; + } + }, + } + } + }; +};