From b18326a07ae0ddf64ee529296424c0130ea9f1c1 Mon Sep 17 00:00:00 2001 From: torque Date: Mon, 18 Sep 2023 00:01:36 -0700 Subject: [PATCH] config: start doing some code cleanup I was pretty sloppy with the code organization while writing out the state machines because my focus was on thinking through the parsing process and logic there. However, The code was not in good shape to continue implementing code features (not document features). This is the first of probably several commits that will work on cleaning up some things. Value has been promoted to the top level namespace, and Document has an initializer function. Referencing Value.List and Value.Map are much cleaner now. Type aliases are good. For the flow parser, `popStack` does not have to access anything except the current stack. This can be passed in as a parameter. This means that `parse` is ready to be refactored to take a buffer and an allocator. The main next steps for code improvement are: 1. reentrant/streaming parser. I am planning to leave it as line-buffered, though I could go further. Line-buffered has two main benefits: the tokenizer doesn't need to be refactored significantly, and the flow parser doesn't need to be made reentrant. I may reevaluate this as I am implementing it, however, as those changes may be simpler than I think. 2. Actually implement the error diagnostics info. I have some skeleton structure in place for this, so it should just be doing the work of getting it hooked up. 3. Parse into object. Metaprogramming, let's go. It will be interesting to try to do this non-recursively, as well (curious to see if it results in code bloat). 4. Object to Document. This is probably going to be annoying, since there are a variety of edge cases that will have to be handled. And lots of objects that cannot be represented as documents. 5. Serialize Document. One thing the parser does not preserve is whether a Value was flow-style or not, so it will be impossible to do round-trip formatting preservation. That's currently a non-goal, and I haven't decided yet if flow-style output should be based on some heuristic (number/length of values in container) or just never emitted. Lack of round-trip preservation does make using this as a general purpose config format a lot more dubious, so I will have to think about this some more. 6. Document to JSON. Why not? I will hand roll this and it will suck. And then everything will be perfect and never need to be touched again. --- src/config.zig | 293 +++++++++++++++++++++++++------------------------ 1 file changed, 147 insertions(+), 146 deletions(-) diff --git a/src/config.zig b/src/config.zig index 1fdc449..9a51809 100644 --- a/src/config.zig +++ b/src/config.zig @@ -350,6 +350,103 @@ pub const LineTokenizer = struct { } }; +pub const Value = union(enum) { + pub const String = std.ArrayList(u8); + pub const Map = std.StringHashMap(Value); + pub const List = std.ArrayList(Value); + + string: String, + list: List, + map: Map, + + pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value { + var res: Value = .{ .string = try String.initCapacity(alloc, input.len) }; + res.string.appendSliceAssumeCapacity(input); + return res; + } + + pub inline fn newString(alloc: std.mem.Allocator) Value { + return .{ .string = String.init(alloc) }; + } + + pub inline fn newList(alloc: std.mem.Allocator) Value { + return .{ .list = List.init(alloc) }; + } + + pub inline fn newMap(alloc: std.mem.Allocator) Value { + return .{ .map = Map.init(alloc) }; + } + + pub fn printDebug(self: Value) void { + self.printRecursive(0); + std.debug.print("\n", .{}); + } + + fn printRecursive(self: Value, indent: usize) void { + switch (self) { + .string => |str| { + if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| { + var lines = std.mem.splitScalar(u8, str.items, '\n'); + std.debug.print("\n", .{}); + while (lines.next()) |line| { + std.debug.print( + "{[empty]s: >[indent]}{[line]s}{[nl]s}", + .{ + .empty = "", + .indent = indent, + .line = line, + .nl = if (lines.peek() == null) "" else "\n", + }, + ); + } + } else { + std.debug.print("{s}", .{str.items}); + } + }, + .list => |list| { + if (list.items.len == 0) { + std.debug.print("[]", .{}); + return; + } + + std.debug.print("[\n", .{}); + for (list.items, 0..) |value, idx| { + std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx }); + value.printRecursive(indent + 2); + std.debug.print(",\n", .{}); + } + std.debug.print( + "{[empty]s: >[indent]}]", + .{ .empty = "", .indent = indent }, + ); + }, + .map => |map| { + if (map.count() == 0) { + std.debug.print("{{}}", .{}); + return; + } + + std.debug.print("{{\n", .{}); + + var iter = map.iterator(); + + while (iter.next()) |entry| { + std.debug.print( + "{[empty]s: >[indent]}{[key]s}: ", + .{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* }, + ); + entry.value_ptr.printRecursive(indent + 4); + std.debug.print(",\n", .{}); + } + std.debug.print( + "{[empty]s: >[indent]}}}", + .{ .empty = "", .indent = indent }, + ); + }, + } + } +}; + pub const Parser = struct { allocator: std.mem.Allocator, dupe_behavior: DuplicateKeyBehavior = .fail, @@ -383,102 +480,6 @@ pub const Parser = struct { fail, }; - pub const Map = std.StringHashMap; - pub const List = std.ArrayList; - - pub const Value = union(enum) { - string: std.ArrayList(u8), - list: List(Value), - map: Map(Value), - - pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value { - var res: Value = .{ .string = try std.ArrayList(u8).initCapacity(alloc, input.len) }; - res.string.appendSliceAssumeCapacity(input); - return res; - } - - pub inline fn newString(alloc: std.mem.Allocator) Value { - return .{ .string = std.ArrayList(u8).init(alloc) }; - } - - pub inline fn newList(alloc: std.mem.Allocator) Value { - return .{ .list = List(Value).init(alloc) }; - } - - pub inline fn newMap(alloc: std.mem.Allocator) Value { - return .{ .map = Map(Value).init(alloc) }; - } - - pub fn printDebug(self: Value) void { - self.printRecursive(0); - std.debug.print("\n", .{}); - } - - fn printRecursive(self: Value, indent: usize) void { - switch (self) { - .string => |str| { - if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| { - var lines = std.mem.splitScalar(u8, str.items, '\n'); - std.debug.print("\n", .{}); - while (lines.next()) |line| { - std.debug.print( - "{[empty]s: >[indent]}{[line]s}{[nl]s}", - .{ - .empty = "", - .indent = indent, - .line = line, - .nl = if (lines.peek() == null) "" else "\n", - }, - ); - } - } else { - std.debug.print("{s}", .{str.items}); - } - }, - .list => |list| { - if (list.items.len == 0) { - std.debug.print("[]", .{}); - return; - } - - std.debug.print("[\n", .{}); - for (list.items, 0..) |value, idx| { - std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx }); - value.printRecursive(indent + 2); - std.debug.print(",\n", .{}); - } - std.debug.print( - "{[empty]s: >[indent]}]", - .{ .empty = "", .indent = indent }, - ); - }, - .map => |map| { - if (map.count() == 0) { - std.debug.print("{{}}", .{}); - return; - } - - std.debug.print("{{\n", .{}); - - var iter = map.iterator(); - - while (iter.next()) |entry| { - std.debug.print( - "{[empty]s: >[indent]}{[key]s}: ", - .{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* }, - ); - entry.value_ptr.printRecursive(indent + 4); - std.debug.print(",\n", .{}); - } - std.debug.print( - "{[empty]s: >[indent]}}}", - .{ .empty = "", .indent = indent }, - ); - }, - } - } - }; - pub const ParseState = enum { initial, value, @@ -489,6 +490,13 @@ pub const Parser = struct { arena: std.heap.ArenaAllocator, root: Value, + pub fn init(alloc: std.mem.Allocator) Document { + return .{ + .arena = std.heap.ArenaAllocator.init(alloc), + .root = undefined, + }; + } + pub fn printDebug(self: Document) void { return self.root.printDebug(); } @@ -499,10 +507,7 @@ pub const Parser = struct { }; pub fn parseBuffer(self: *Parser, buffer: []const u8) Error!Document { - var document: Document = .{ - .arena = std.heap.ArenaAllocator.init(self.allocator), - .root = undefined, - }; + var document = Document.init(self.allocator); errdefer document.deinit(); const arena_alloc = document.arena.allocator(); @@ -556,7 +561,7 @@ pub const Parser = struct { }, }, .list_item => |value| { - document.root = .{ .list = List(Value).init(arena_alloc) }; + document.root = .{ .list = Value.List.init(arena_alloc) }; try stack.append(&document.root); switch (value) { @@ -579,7 +584,7 @@ pub const Parser = struct { } }, .map_item => |pair| { - document.root = .{ .map = Map(Value).init(arena_alloc) }; + document.root = .{ .map = Value.Map.init(arena_alloc) }; try stack.append(&document.root); switch (pair.val) { @@ -713,7 +718,7 @@ pub const Parser = struct { if (expect_shift != .indent) return error.UnexpectedIndent; - const new_list = try appendListGetValue(list, .{ .list = List(Value).init(arena_alloc) }); + const new_list = try appendListGetValue(list, .{ .list = Value.List.init(arena_alloc) }); try stack.append(new_list); expect_shift = .none; @@ -739,7 +744,7 @@ pub const Parser = struct { if (line.indent != .indent) return error.UnexpectedValue; - const new_map = try appendListGetValue(list, .{ .map = Map(Value).init(arena_alloc) }); + const new_map = try appendListGetValue(list, .{ .map = Value.Map.init(arena_alloc) }); try stack.append(new_map); expect_shift = .none; @@ -822,7 +827,7 @@ pub const Parser = struct { if (expect_shift != .indent or line.indent != .indent or dangling_key == null) return error.UnexpectedValue; - const new_list = try putMapGetValue(map, dangling_key.?, .{ .list = List(Value).init(arena_alloc) }, self.dupe_behavior); + const new_list = try putMapGetValue(map, dangling_key.?, .{ .list = Value.List.init(arena_alloc) }, self.dupe_behavior); try stack.append(new_list); dangling_key = null; @@ -851,7 +856,7 @@ pub const Parser = struct { .indent => { if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue; - const new_map = try putMapGetValue(map, dangling_key.?, .{ .map = Map(Value).init(arena_alloc) }, self.dupe_behavior); + const new_map = try putMapGetValue(map, dangling_key.?, .{ .map = Value.Map.init(arena_alloc) }, self.dupe_behavior); try stack.append(new_map); dangling_key = null; @@ -882,8 +887,8 @@ pub const Parser = struct { switch (state) { .initial => switch (self.default_object) { .string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) }, - .list => document.root = .{ .list = List(Value).init(arena_alloc) }, - .map => document.root = .{ .map = Map(Value).init(arena_alloc) }, + .list => document.root = .{ .list = Value.List.init(arena_alloc) }, + .map => document.root = .{ .map = Value.Map.init(arena_alloc) }, .fail => return error.EmptyDocument, }, .value => switch (stack.getLast().*) { @@ -920,16 +925,16 @@ pub const Parser = struct { return try parser.parse(dupe_behavior); } - inline fn appendListGetValue(list: *List(Value), value: Value) Error!*Value { + inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value { try list.append(value); return &list.items[list.items.len - 1]; } - inline fn putMap(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void { + inline fn putMap(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void { _ = try putMapGetValue(map, key, value, dupe_behavior); } - inline fn putMapGetValue(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value { + inline fn putMapGetValue(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value { const gop = try map.getOrPut(key); if (gop.found_existing) @@ -989,8 +994,6 @@ pub const Parser = struct { }; pub const FlowParser = struct { - pub const Value = Parser.Value; - const FlowStackItem = struct { value: *Value, // lists need this. maps do also for keys and values. @@ -1057,35 +1060,17 @@ pub const FlowParser = struct { stack.items[stack.items.len - 1].item_start = start; } - inline fn popStack(self: *FlowParser, idx: usize) Parser.Error!void { - const finished = self.stack.popOrNull() orelse return error.BadState; - if (finished.value.* == .list) { - // this is not valid if we are in the want_list_separator state because - // there is no trailing comma in that state + inline fn popStack(self: *FlowParser) Parser.Error!ParseState { + if (self.stack.popOrNull() == null) + return error.BadState; - if (self.state == .want_list_item and (finished.value.list.items.len > 0 or idx > finished.item_start)) - try finished.value.list.append( - try Parser.valueFromString(self.alloc, ""), - ) - else if (self.state == .consuming_list_item) - try finished.value.list.append( - try Parser.valueFromString( - self.alloc, - self.buffer[finished.item_start..idx], - ), - ); - } + const parent = self.stack.getLastOrNull() orelse return .done; - const parent = self.stack.getLastOrNull() orelse { - self.state = .done; - return; - }; - - switch (parent.value.*) { - .list => self.state = .want_list_separator, - .map => self.state = .want_map_separator, + return switch (parent.value.*) { + .list => .want_list_separator, + .map => .want_map_separator, else => return error.BadState, - } + }; } pub fn parse(self: *FlowParser, dupe_behavior: Parser.DuplicateKeyBehavior) Parser.Error!Value { @@ -1143,7 +1128,14 @@ pub const FlowParser = struct { try self.stack.append(.{ .value = new_list, .item_start = idx + 1 }); self.state = .want_list_item; }, - ']' => try self.popStack(idx), + ']' => { + const finished = self.stack.getLastOrNull() orelse return error.BadState; + if (finished.value.list.items.len > 0 or idx > finished.item_start) + try finished.value.list.append( + try Parser.valueFromString(self.alloc, ""), + ); + self.state = try self.popStack(); + }, else => { try setStackItemStart(self.stack, idx); self.state = .consuming_list_item; @@ -1160,7 +1152,16 @@ pub const FlowParser = struct { self.state = .want_list_item; }, - ']' => try self.popStack(idx), + ']' => { + const finished = self.stack.getLastOrNull() orelse return error.BadState; + try finished.value.list.append( + try Parser.valueFromString( + self.alloc, + self.buffer[finished.item_start..idx], + ), + ); + self.state = try self.popStack(); + }, else => continue :charloop, }, .want_list_separator => switch (char) { @@ -1169,7 +1170,7 @@ pub const FlowParser = struct { try setStackItemStart(self.stack, idx); self.state = .want_list_item; }, - ']' => try self.popStack(idx), + ']' => self.state = try self.popStack(), else => return error.BadToken, }, .want_map_key => switch (char) { @@ -1183,7 +1184,7 @@ pub const FlowParser = struct { dangling_key = ""; self.state = .want_map_value; }, - '}' => try self.popStack(idx), + '}' => self.state = try self.popStack(), else => { try setStackItemStart(self.stack, idx); self.state = .consuming_map_key; @@ -1251,7 +1252,7 @@ pub const FlowParser = struct { ); dangling_key = null; - try self.popStack(idx); + self.state = try self.popStack(); }, else => { try setStackItemStart(self.stack, idx); @@ -1269,14 +1270,14 @@ pub const FlowParser = struct { ); dangling_key = null; self.state = .want_map_key; - if (term == '}') try self.popStack(idx); + if (term == '}') self.state = try self.popStack(); }, else => continue :charloop, }, .want_map_separator => switch (char) { ' ', '\t' => continue :charloop, ',' => self.state = .want_map_key, - '}' => try self.popStack(idx), + '}' => self.state = try self.popStack(), else => return error.BadToken, }, // the root value was closed but there are characters remaining