const std = @import("std"); const buffers = @import("./linebuffer.zig"); const tokenizer = @import("./tokenizer.zig"); const Value = @import("./parser/value.zig").Value; pub const Diagnostics = struct { row: usize, span: struct { absolute: usize, line_offset: usize, length: usize }, message: []const u8, }; pub const Error = error{ UnexpectedIndent, UnexpectedValue, ExtraContent, EmptyDocument, DuplicateKey, BadMapEntry, BadState, BadToken, Fail, } || tokenizer.Error || std.mem.Allocator.Error; pub const DuplicateKeyBehavior = enum { use_first, use_last, fail, }; pub const DefaultObject = enum { scalar, string, list, map, fail, }; const ParseState = enum { initial, value, done }; pub const Document = struct { arena: std.heap.ArenaAllocator, root: Value, pub fn init(alloc: std.mem.Allocator) Document { return .{ .arena = std.heap.ArenaAllocator.init(alloc), .root = undefined, }; } pub fn printDebug(self: Document) void { return self.root.printDebug(); } pub fn deinit(self: Document) void { self.arena.deinit(); } }; pub const Parser = struct { allocator: std.mem.Allocator, dupe_behavior: DuplicateKeyBehavior = .fail, default_object: DefaultObject = .fail, diagnostics: Diagnostics = .{ .row = 0, .span = .{ .absolute = 0, .line_offset = 0, .length = 0 }, .message = "all is well", }, pub const State = struct { pub const Stack = std.ArrayList(*Value); document: Document, value_stack: Stack, state: enum { initial, value, done } = .initial, expect_shift: tokenizer.ShiftDirection = .none, dangling_key: ?[]const u8 = null, pub fn init(alloc: std.mem.Allocator) State { return .{ .document = Document.init(alloc), .value_stack = Stack.init(alloc), }; } pub fn deinit(self: State) void { self.value_stack.deinit(); } }; pub fn parseBuffer(self: *Parser, buffer: []const u8) Error!Document { var document = Document.init(self.allocator); errdefer document.deinit(); const arena_alloc = document.arena.allocator(); var state: ParseState = .initial; var expect_shift: tokenizer.ShiftDirection = .none; var dangling_key: ?[]const u8 = null; var stack = std.ArrayList(*Value).init(arena_alloc); defer stack.deinit(); var tok: tokenizer.LineTokenizer(buffers.FixedLineBuffer) = .{ .buffer = buffers.FixedLineBuffer.init(buffer), .diagnostics = &self.diagnostics, }; while (try tok.next()) |line| { if (line.contents == .comment) continue; var flip = true; var flop = false; // this is needed to give us a second go round when the line is dedented flipflop: while (flip) : (flop = true) { switch (state) { .initial => { if (line.shift == .indent) return error.UnexpectedIndent; switch (line.contents) { // we filter out comments above .comment => unreachable, .in_line => |in_line| switch (in_line) { // empty scalars are only emitted for a list_item or a map_item .empty => unreachable, .scalar => |str| { document.root = try Value.fromScalar(arena_alloc, str); // this is a cheesy hack. If the document consists // solely of a scalar, the finalizer will try to // chop a line ending off of it, so we need to add // a sacrificial padding character to avoid // chopping off something that matters. try document.root.string.append(' '); state = .done; }, .line_string, .space_string => |str| { document.root = try Value.fromString(arena_alloc, str); try document.root.string.append(in_line.lineEnding()); try stack.append(&document.root); state = .value; }, .flow_list => |str| { document.root = try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior); state = .done; }, .flow_map => |str| { document.root = try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior); state = .done; }, }, .list_item => |value| { document.root = Value.newList(arena_alloc); try stack.append(&document.root); state = .value; switch (value) { .empty => expect_shift = .indent, .scalar => |str| try document.root.list.append(try Value.fromScalar(arena_alloc, str)), .line_string, .space_string => |str| try document.root.list.append(try Value.fromString(arena_alloc, str)), .flow_list => |str| try document.root.list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)), .flow_map => |str| try document.root.list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)), } }, .map_item => |pair| { document.root = Value.newMap(arena_alloc); try stack.append(&document.root); state = .value; const dupekey = try arena_alloc.dupe(u8, pair.key); switch (pair.val) { .empty => { expect_shift = .indent; // If the key is on its own line, we don't have // an associated value until we parse the next // line. We need to store a reference to this // key somewhere until we can consume the // value. More parser state to lug along. dangling_key = dupekey; }, .scalar => |str| try document.root.map.put(dupekey, try Value.fromScalar(arena_alloc, str)), .line_string, .space_string => |str| try document.root.map.put(dupekey, try Value.fromString(arena_alloc, str)), .flow_list => |str| try document.root.map.put(dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)), .flow_map => |str| try document.root.map.put(dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)), } }, } }, .value => switch (stack.getLast().*) { // these three states are never reachable here. flow_list and // flow_map are parsed with a separate state machine. These // value types can only be present by themselves as the first // line of the document, in which case the document consists // only of that single line: this parser jumps immediately into // the .done state, bypassing the .value state in which this // switch is embedded. .scalar, .flow_list, .flow_map => unreachable, .string => |*string| { if (line.shift == .indent) return error.UnexpectedIndent; if (!flop and line.shift == .dedent) { // kick off the last trailing space or newline _ = string.pop(); var dedent_depth = line.shift.dedent; while (dedent_depth > 0) : (dedent_depth -= 1) _ = stack.pop(); continue :flipflop; } switch (line.contents) { .comment => unreachable, .in_line => |in_line| switch (in_line) { .empty => unreachable, .line_string, .space_string => |str| { try string.appendSlice(str); try string.append(in_line.lineEnding()); }, else => return error.UnexpectedValue, }, else => return error.UnexpectedValue, } }, .list => |*list| { // detect that the previous item was actually empty // // - // - something // // the first line here creates the expect_shift, but the second line // is a valid continuation of the list despite not being indented if (!flop and (expect_shift == .indent and line.shift != .indent)) try list.append(Value.newScalar(arena_alloc)); // Consider: // // - // own-line scalar // - inline scalar // // the own-line scalar will not push the stack but the next list item will be a dedent if (!flop and line.shift == .dedent) { // if line.shift.dedent is 1 and we're expecting it, the stack will not be popped, // but we will continue loop flipflop. However, flop will be set to false on the next // trip, so this if prong will not be run again. var dedent_depth = line.shift.dedent - @intFromBool(expect_shift == .dedent); while (dedent_depth > 0) : (dedent_depth -= 1) _ = stack.pop(); continue :flipflop; } switch (line.contents) { .comment => unreachable, .in_line => |in_line| { // assert that this line has been indented. this is required for an inline value when // the stack is in list mode. if (expect_shift != .indent or line.shift != .indent) return error.UnexpectedValue; expect_shift = .dedent; switch (in_line) { .empty => unreachable, .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)), .flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)), .flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)), .line_string, .space_string => |str| { // string pushes the stack const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str)); try stack.append(new_string); try new_string.string.append(in_line.lineEnding()); expect_shift = .none; }, } }, .list_item => |value| { if (flop or (line.shift == .none or line.shift == .dedent)) { expect_shift = .none; switch (value) { .empty => expect_shift = .indent, .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)), .line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)), .flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)), .flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)), } } else if (line.shift == .indent) { if (expect_shift != .indent) return error.UnexpectedIndent; const new_list = try appendListGetValue(list, Value.newList(arena_alloc)); try stack.append(new_list); expect_shift = .none; continue :flipflop; } else unreachable; }, .map_item => { // this prong cannot be hit on dedent in a valid way. // // - // map: value // second: value // third: value // // dedenting back to the list stack level requires list_item if (line.shift != .indent) return error.UnexpectedValue; const new_map = try appendListGetValue(list, Value.newMap(arena_alloc)); try stack.append(new_map); expect_shift = .none; continue :flipflop; }, } }, .map => |*map| { // detect that the previous item was actually empty // // foo: // bar: baz // // the first line here creates the expect_shift, but the second line // is a valid continuation of the map despite not being indented if (!flop and (expect_shift == .indent and line.shift != .indent)) { try putMap( map, dangling_key orelse return error.Fail, Value.newScalar(arena_alloc), self.dupe_behavior, ); dangling_key = null; } if (!flop and line.shift == .dedent) { var dedent_depth = line.shift.dedent - @intFromBool(expect_shift == .dedent); while (dedent_depth > 0) : (dedent_depth -= 1) _ = stack.pop(); continue :flipflop; } switch (line.contents) { .comment => unreachable, .in_line => |in_line| { // assert that this line has been indented. this is required for an inline value when // the stack is in map mode. if (expect_shift != .indent or line.shift != .indent or dangling_key == null) return error.UnexpectedValue; expect_shift = .dedent; switch (in_line) { .empty => unreachable, .scalar => |str| try putMap(map, dangling_key.?, try Value.fromScalar(arena_alloc, str), self.dupe_behavior), .flow_list => |str| try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior), .flow_map => |str| { try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior); }, .line_string, .space_string => |str| { // string pushes the stack const new_string = try putMapGetValue(map, dangling_key.?, try Value.fromString(arena_alloc, str), self.dupe_behavior); try new_string.string.append(in_line.lineEnding()); try stack.append(new_string); expect_shift = .none; }, } dangling_key = null; }, .list_item => { // this prong cannot be hit on dedent in a valid way. // // map: // - value // - invalid // // dedenting back to the map stack level requires map_item if (expect_shift != .indent or line.shift != .indent or dangling_key == null) return error.UnexpectedValue; const new_list = try putMapGetValue(map, dangling_key.?, Value.newList(arena_alloc), self.dupe_behavior); try stack.append(new_list); dangling_key = null; expect_shift = .none; continue :flipflop; }, .map_item => |pair| { if (flop or (line.shift == .none or line.shift == .dedent)) { expect_shift = .none; const dupekey = try arena_alloc.dupe(u8, pair.key); switch (pair.val) { .empty => { expect_shift = .indent; dangling_key = dupekey; }, .scalar => |str| try putMap(map, dupekey, try Value.fromScalar(arena_alloc, str), self.dupe_behavior), .line_string, .space_string => |str| try putMap(map, dupekey, try Value.fromString(arena_alloc, str), self.dupe_behavior), .flow_list => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior), .flow_map => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior), } } else if (line.shift == .indent) { if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue; const new_map = try putMapGetValue(map, dangling_key.?, Value.newMap(arena_alloc), self.dupe_behavior); try stack.append(new_map); dangling_key = null; continue :flipflop; } else unreachable; }, } }, }, .done => return error.ExtraContent, } // this is specifically performed at the end of the loop body so that // `continue :flipflop` skips setting it. flip = false; } } switch (state) { .initial => switch (self.default_object) { .scalar => document.root = .{ .scalar = std.ArrayList(u8).init(arena_alloc) }, .string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) }, .list => document.root = Value.newList(arena_alloc), .map => document.root = Value.newMap(arena_alloc), .fail => return error.EmptyDocument, }, .value => switch (stack.getLast().*) { // remove the final trailing newline or space .scalar, .string => |*string| _ = string.popOrNull(), // if we have a dangling -, attach an empty string to it .list => |*list| if (expect_shift == .indent) try list.append(Value.newScalar(arena_alloc)), // if we have a dangling "key:", attach an empty string to it .map => |*map| if (dangling_key) |dk| try putMap(map, dk, Value.newScalar(arena_alloc), self.dupe_behavior), .flow_list, .flow_map => {}, }, .done => {}, } return document; } const FlowStack: type = std.ArrayList(*Value); inline fn getStackTip(stack: FlowStack) Error!*Value { if (stack.items.len == 0) return error.BadState; return stack.items[stack.items.len - 1]; } inline fn popStack(stack: *FlowStack) Error!FlowParseState { if (stack.popOrNull() == null) return error.BadState; const parent = stack.getLastOrNull() orelse return .done; return switch (parent.*) { .flow_list => .want_list_separator, .flow_map => .want_map_separator, else => return error.BadState, }; } const FlowParseState = enum { want_list_item, consuming_list_item, want_list_separator, want_map_key, consuming_map_key, want_map_value, consuming_map_value, want_map_separator, done, }; pub fn parseFlow( alloc: std.mem.Allocator, contents: []const u8, root_type: Value.TagType, dupe_behavior: DuplicateKeyBehavior, ) Error!Value { var root: Value = switch (root_type) { .flow_list => Value.newFlowList(alloc), .flow_map => Value.newFlowMap(alloc), else => return error.BadState, }; var state: FlowParseState = switch (root_type) { .flow_list => .want_list_item, .flow_map => .want_map_key, else => unreachable, }; var stack = try FlowStack.initCapacity(alloc, 1); stack.appendAssumeCapacity(&root); // used to distinguish betwen [] and [ ], and it also tracks // a continuous value between different states var item_start: usize = 0; var dangling_key: ?[]const u8 = null; charloop: for (contents, 0..) |char, idx| { switch (state) { .want_list_item => switch (char) { ' ', '\t' => continue :charloop, ',' => { // empty value const tip = try getStackTip(stack); try tip.flow_list.append(Value.newScalar(alloc)); item_start = idx + 1; }, '{' => { const tip = try getStackTip(stack); const new_map = try Parser.appendListGetValue( &tip.flow_list, Value.newFlowMap(alloc), ); item_start = idx; try stack.append(new_map); state = .want_map_key; }, '[' => { const tip = try getStackTip(stack); const new_list = try Parser.appendListGetValue( &tip.flow_list, Value.newFlowList(alloc), ); item_start = idx + 1; try stack.append(new_list); state = .want_list_item; }, ']' => { const finished = stack.getLastOrNull() orelse return error.BadState; if (finished.flow_list.items.len > 0 or idx > item_start) try finished.flow_list.append(Value.newScalar(alloc)); state = try popStack(&stack); }, else => { item_start = idx; state = .consuming_list_item; }, }, .consuming_list_item => switch (char) { ',' => { const tip = try getStackTip(stack); try tip.flow_list.append( try Value.fromScalar(alloc, contents[item_start..idx]), ); item_start = idx + 1; state = .want_list_item; }, ']' => { const finished = stack.getLastOrNull() orelse return error.BadState; try finished.flow_list.append( try Value.fromScalar(alloc, contents[item_start..idx]), ); state = try popStack(&stack); }, else => continue :charloop, }, .want_list_separator => switch (char) { ' ', '\t' => continue :charloop, ',' => { item_start = idx; state = .want_list_item; }, ']' => state = try popStack(&stack), else => return error.BadToken, }, .want_map_key => switch (char) { ' ', '\t' => continue :charloop, // forbid these characters so that flow dictionary keys cannot start // with characters that regular dictionary keys cannot start with // (even though they're unambiguous in this specific context). '{', '[', '#', '-', '>', '|', ',' => return error.BadToken, ':' => { // we have an empty map key dangling_key = ""; state = .want_map_value; }, '}' => state = try popStack(&stack), else => { item_start = idx; state = .consuming_map_key; }, }, .consuming_map_key => switch (char) { ':' => { dangling_key = try alloc.dupe(u8, contents[item_start..idx]); state = .want_map_value; }, else => continue :charloop, }, .want_map_value => switch (char) { ' ', '\t' => continue :charloop, ',' => { const tip = try getStackTip(stack); try Parser.putMap( &tip.flow_map, dangling_key.?, Value.newScalar(alloc), dupe_behavior, ); dangling_key = null; state = .want_map_key; }, '[' => { const tip = try getStackTip(stack); const new_list = try Parser.putMapGetValue( &tip.flow_map, dangling_key.?, Value.newFlowList(alloc), dupe_behavior, ); try stack.append(new_list); dangling_key = null; item_start = idx + 1; state = .want_list_item; }, '{' => { const tip = try getStackTip(stack); const new_map = try Parser.putMapGetValue( &tip.flow_map, dangling_key.?, Value.newFlowMap(alloc), dupe_behavior, ); try stack.append(new_map); dangling_key = null; state = .want_map_key; }, '}' => { // the value is an empty string and this map is closed const tip = try getStackTip(stack); try Parser.putMap( &tip.flow_map, dangling_key.?, Value.newScalar(alloc), dupe_behavior, ); dangling_key = null; state = try popStack(&stack); }, else => { item_start = idx; state = .consuming_map_value; }, }, .consuming_map_value => switch (char) { ',', '}' => |term| { const tip = try getStackTip(stack); try Parser.putMap( &tip.flow_map, dangling_key.?, try Value.fromScalar(alloc, contents[item_start..idx]), dupe_behavior, ); dangling_key = null; state = .want_map_key; if (term == '}') state = try popStack(&stack); }, else => continue :charloop, }, .want_map_separator => switch (char) { ' ', '\t' => continue :charloop, ',' => state = .want_map_key, '}' => state = try popStack(&stack), else => return error.BadToken, }, // the root value was closed but there are characters remaining // in the buffer .done => return error.BadState, } } // we ran out of characters while still in the middle of an object if (state != .done) return error.BadState; return root; } inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value { try list.append(value); return &list.items[list.items.len - 1]; } inline fn putMap(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void { _ = try putMapGetValue(map, key, value, dupe_behavior); } inline fn putMapGetValue(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value { const gop = try map.getOrPut(key); if (gop.found_existing) switch (dupe_behavior) { .fail => return error.DuplicateKey, .use_first => {}, .use_last => gop.value_ptr.* = value, } else gop.value_ptr.* = value; return gop.value_ptr; } };