config: migrate flow parser into the main parser object

I think I am actually going to make this a method of the ParserState struct soon so lol check out my freaking code churn. But here we are.
config: remove some duplication in the parser
2023-09-23 13:29:49 -07:00 · 2023-09-23 01:07:04 -07:00 · 2023-09-22 01:01:46 -07:00 · 2023-09-22 01:01:46 -07:00 · 2023-09-22 01:00:17 -07:00 · 2023-09-22 01:00:17 -07:00
1 changed files with 301 additions and 394 deletions
--- a/src/config.zig
+++ b/src/config.zig
@@ -566,8 +566,10 @@ pub const Parser = struct {
        EmptyDocument,
        DuplicateKey,
        BadMapEntry,
+        BadState,
+        BadToken,
        Fail,
-    } || LineTokenizer(FixedLineBuffer).Error || FlowParser.Error || std.mem.Allocator.Error;
+    } || LineTokenizer(FixedLineBuffer).Error || std.mem.Allocator.Error;

    pub const DuplicateKeyBehavior = enum {
        use_first,
@@ -679,11 +681,11 @@ pub const Parser = struct {
                                    state = .value;
                                },
                                .flow_list => |str| {
-                                    document.root = try parseFlowList(arena_alloc, str, self.dupe_behavior);
+                                    document.root = try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior);
                                    state = .done;
                                },
                                .flow_map => |str| {
-                                    document.root = try parseFlowMap(arena_alloc, str, self.dupe_behavior);
+                                    document.root = try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior);
                                    state = .done;
                                },
                            },
@@ -705,11 +707,11 @@ pub const Parser = struct {
                                        state = .value;
                                    },
                                    .flow_list => |str| {
-                                        try document.root.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior));
+                                        try document.root.list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior));
                                        state = .value;
                                    },
                                    .flow_map => |str| {
-                                        try document.root.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior));
+                                        try document.root.list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior));
                                        state = .value;
                                    },
                                }
@@ -743,11 +745,11 @@ pub const Parser = struct {
                                        state = .value;
                                    },
                                    .flow_list => |str| {
-                                        try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior));
+                                        try document.root.map.put(pair.key, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior));
                                        state = .value;
                                    },
                                    .flow_map => |str| {
-                                        try document.root.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior));
+                                        try document.root.map.put(pair.key, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior));
                                        state = .value;
                                    },
                                }
@@ -757,7 +759,7 @@ pub const Parser = struct {
                    .value => switch (stack.getLast().*) {
                        // these three states are never reachable here. flow_list and
                        // flow_map are parsed with a separate state machine. These
-                        // value tyeps can only be present by themselves as the first
+                        // value types can only be present by themselves as the first
                        // line of the document, in which case the document consists
                        // only of that single line: this parser jumps immediately into
                        // the .done state, bypassing the .value state in which this
@@ -799,7 +801,7 @@ pub const Parser = struct {
                            //
                            // the first line here creates the expect_shift, but the second line
                            // is a valid continuation of the list despite not being indented
-                            if (expect_shift == .indent and line.indent != .indent)
+                            if (!flop and (expect_shift == .indent and line.indent != .indent))
                                try list.append(Value.newScalar(arena_alloc));

                            // Consider:
@@ -833,52 +835,38 @@ pub const Parser = struct {
                                    switch (in_line) {
                                        .empty => unreachable,
                                        .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
-                                        .flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                        .flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
+                                        .flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
+                                        .flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
                                        .line_string, .space_string => |str| {
                                            // string pushes the stack
                                            const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str));
+                                            try stack.append(new_string);

                                            try new_string.string.append(in_line.lineEnding());
-
-                                            try stack.append(new_string);
                                            expect_shift = .none;
                                        },
                                    }
                                },
                                .list_item => |value| {
-                                    switch (line.indent) {
-                                        // for dedent, the stack has already been popped, so this should be fine
-                                        .none, .dedent => {
+                                    if (flop or (line.indent == .none or line.indent == .dedent)) {
                                        expect_shift = .none;
                                        switch (value) {
                                            .empty => expect_shift = .indent,
                                            .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
                                            .line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
-                                                .flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                                .flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
+                                            .flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
+                                            .flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
                                        }
-                                        },
-                                        // a new list is being created
-                                        .indent => {
-                                            if (expect_shift != .indent)
-                                                return error.UnexpectedIndent;
+                                    } else if (line.indent == .indent) {
+                                        if (expect_shift != .indent) return error.UnexpectedIndent;

                                        const new_list = try appendListGetValue(list, Value.newList(arena_alloc));
                                        try stack.append(new_list);
-
                                        expect_shift = .none;
-                                            switch (value) {
-                                                .empty => expect_shift = .indent,
-                                                .scalar => |str| try new_list.list.append(try Value.fromScalar(arena_alloc, str)),
-                                                .line_string, .space_string => |str| try new_list.list.append(try Value.fromString(arena_alloc, str)),
-                                                .flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                                .flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
-                                            }
+                                        continue :flipflop;
+                                    } else unreachable;
                                },
-                                    }
-                                },
-                                .map_item => |pair| {
+                                .map_item => {
                                    // this prong cannot be hit on dedent in a valid way.
                                    //
                                    //    -
@@ -894,17 +882,7 @@ pub const Parser = struct {
                                    const new_map = try appendListGetValue(list, Value.newMap(arena_alloc));
                                    try stack.append(new_map);
                                    expect_shift = .none;
-
-                                    switch (pair.val) {
-                                        .empty => {
-                                            dangling_key = try arena_alloc.dupe(u8, pair.key);
-                                            expect_shift = .indent;
-                                        },
-                                        .scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
-                                        .line_string, .space_string => |str| try new_map.map.put(pair.key, try Value.fromString(arena_alloc, str)),
-                                        .flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                        .flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
-                                    }
+                                    continue :flipflop;
                                },
                            }
                        },
@@ -916,7 +894,7 @@ pub const Parser = struct {
                            //
                            // the first line here creates the expect_shift, but the second line
                            // is a valid continuation of the map despite not being indented
-                            if (expect_shift == .indent and line.indent != .indent) {
+                            if (!flop and (expect_shift == .indent and line.indent != .indent)) {
                                try putMap(
                                    map,
                                    dangling_key orelse return error.Fail,
@@ -948,9 +926,9 @@ pub const Parser = struct {
                                    switch (in_line) {
                                        .empty => unreachable,
                                        .scalar => |str| try putMap(map, dangling_key.?, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
-                                        .flow_list => |str| try putMap(map, dangling_key.?, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
+                                        .flow_list => |str| try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
                                        .flow_map => |str| {
-                                            try putMap(map, dangling_key.?, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior);
+                                            try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior);
                                        },
                                        .line_string, .space_string => |str| {
                                            // string pushes the stack
@@ -963,7 +941,7 @@ pub const Parser = struct {

                                    dangling_key = null;
                                },
-                                .list_item => |value| {
+                                .list_item => {
                                    // this prong cannot be hit on dedent in a valid way.
                                    //
                                    //    map:
@@ -978,50 +956,30 @@ pub const Parser = struct {
                                    const new_list = try putMapGetValue(map, dangling_key.?, Value.newList(arena_alloc), self.dupe_behavior);
                                    try stack.append(new_list);
                                    dangling_key = null;
-
                                    expect_shift = .none;
-                                    switch (value) {
-                                        .empty => expect_shift = .indent,
-                                        .scalar => |str| try new_list.list.append(try Value.fromScalar(arena_alloc, str)),
-                                        .line_string, .space_string => |str| try new_list.list.append(try Value.fromString(arena_alloc, str)),
-                                        .flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                        .flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
-                                    }
+                                    continue :flipflop;
                                },
                                .map_item => |pair| {
+                                    if (flop or (line.indent == .none or line.indent == .dedent)) {
                                        expect_shift = .none;
-                                    switch (line.indent) {
-                                        // for dedent, the stack has already been popped, so this should be fine
-                                        .none, .dedent => switch (pair.val) {
+                                        switch (pair.val) {
                                            .empty => {
                                                expect_shift = .indent;
                                                dangling_key = try arena_alloc.dupe(u8, pair.key);
                                            },
                                            .scalar => |str| try putMap(map, pair.key, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
                                            .line_string, .space_string => |str| try putMap(map, pair.key, try Value.fromString(arena_alloc, str), self.dupe_behavior),
-                                            .flow_list => |str| try putMap(map, pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
-                                            .flow_map => |str| try putMap(map, pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
-                                        },
-                                        // a new map is being created
-                                        .indent => {
+                                            .flow_list => |str| try putMap(map, pair.key, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
+                                            .flow_map => |str| try putMap(map, pair.key, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior),
+                                        }
+                                    } else if (line.indent == .indent) {
                                        if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;

                                        const new_map = try putMapGetValue(map, dangling_key.?, Value.newMap(arena_alloc), self.dupe_behavior);
                                        try stack.append(new_map);
                                        dangling_key = null;
-
-                                            switch (pair.val) {
-                                                .empty => {
-                                                    expect_shift = .indent;
-                                                    dangling_key = try arena_alloc.dupe(u8, pair.key);
-                                                },
-                                                .scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
-                                                .line_string, .space_string => |str| try new_map.map.put(pair.key, try Value.fromString(arena_alloc, str)),
-                                                .flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
-                                                .flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
-                                            }
-                                        },
-                                    }
+                                        continue :flipflop;
+                                    } else unreachable;
                                },
                            }
                        },
@@ -1057,18 +1015,261 @@ pub const Parser = struct {
        return document;
    }

-    fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
-        var parser = try FlowParser.initList(alloc, contents);
-        defer parser.deinit();
+    const FlowStackItem = struct {
+        value: *Value,
+        // lists need this. maps do also for keys and values.
+        item_start: usize = 0,
+    };

-        return try parser.parse(dupe_behavior);
+    const FlowStack: type = std.ArrayList(FlowStackItem);
+
+    inline fn getStackTip(stack: FlowStack) Error!*FlowStackItem {
+        if (stack.items.len == 0) return error.BadState;
+        return &stack.items[stack.items.len - 1];
    }

-    fn parseFlowMap(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
-        var parser = try FlowParser.initMap(alloc, contents);
-        defer parser.deinit();
+    inline fn setStackItemStart(stack: FlowStack, start: usize) Error!void {
+        if (stack.items.len == 0) return error.BadState;
+        stack.items[stack.items.len - 1].item_start = start;
+    }

-        return try parser.parse(dupe_behavior);
+    inline fn popStack(stack: *FlowStack) Error!FlowParseState {
+        if (stack.popOrNull() == null)
+            return error.BadState;
+
+        const parent = stack.getLastOrNull() orelse return .done;
+
+        return switch (parent.value.*) {
+            .flow_list => .want_list_separator,
+            .flow_map => .want_map_separator,
+            else => return error.BadState,
+        };
+    }
+
+    const FlowParseState = enum {
+        want_list_item,
+        consuming_list_item,
+        want_list_separator,
+        want_map_key,
+        consuming_map_key,
+        want_map_value,
+        consuming_map_value,
+        want_map_separator,
+        done,
+    };
+
+    pub fn parseFlow(
+        alloc: std.mem.Allocator,
+        contents: []const u8,
+        root_type: Value.TagType,
+        dupe_behavior: DuplicateKeyBehavior,
+    ) Error!Value {
+        // prime the stack:
+
+        var root: Value = switch (root_type) {
+            .flow_list => Value.newFlowList(alloc),
+            .flow_map => Value.newFlowMap(alloc),
+            else => return error.BadState,
+        };
+        var state: FlowParseState = switch (root_type) {
+            .flow_list => .want_list_item,
+            .flow_map => .want_map_key,
+            else => unreachable,
+        };
+        var stack = try FlowStack.initCapacity(alloc, 1);
+        stack.appendAssumeCapacity(.{ .value = &root });
+        var dangling_key: ?[]const u8 = null;
+
+        charloop: for (contents, 0..) |char, idx| {
+            switch (state) {
+                .want_list_item => switch (char) {
+                    ' ', '\t' => continue :charloop,
+                    ',' => {
+                        // empty value
+                        const tip = try getStackTip(stack);
+                        try tip.value.flow_list.append(Value.newScalar(alloc));
+                        tip.item_start = idx + 1;
+                    },
+                    '{' => {
+                        const tip = try getStackTip(stack);
+
+                        const new_map = try Parser.appendListGetValue(
+                            &tip.value.flow_list,
+                            Value.newFlowMap(alloc),
+                        );
+
+                        tip.item_start = idx;
+                        try stack.append(.{ .value = new_map });
+                        state = .want_map_key;
+                    },
+                    '[' => {
+                        const tip = try getStackTip(stack);
+
+                        const new_list = try Parser.appendListGetValue(
+                            &tip.value.flow_list,
+                            Value.newFlowList(alloc),
+                        );
+
+                        tip.item_start = idx;
+                        try stack.append(.{ .value = new_list, .item_start = idx + 1 });
+                        state = .want_list_item;
+                    },
+                    ']' => {
+                        const finished = stack.getLastOrNull() orelse return error.BadState;
+                        if (finished.value.flow_list.items.len > 0 or idx > finished.item_start)
+                            try finished.value.flow_list.append(Value.newScalar(alloc));
+                        state = try popStack(&stack);
+                    },
+                    else => {
+                        try setStackItemStart(stack, idx);
+                        state = .consuming_list_item;
+                    },
+                },
+                .consuming_list_item => switch (char) {
+                    ',' => {
+                        const tip = try getStackTip(stack);
+
+                        try tip.value.flow_list.append(
+                            try Value.fromScalar(alloc, contents[tip.item_start..idx]),
+                        );
+                        tip.item_start = idx + 1;
+
+                        state = .want_list_item;
+                    },
+                    ']' => {
+                        const finished = stack.getLastOrNull() orelse return error.BadState;
+                        try finished.value.flow_list.append(
+                            try Value.fromScalar(alloc, contents[finished.item_start..idx]),
+                        );
+                        state = try popStack(&stack);
+                    },
+                    else => continue :charloop,
+                },
+                .want_list_separator => switch (char) {
+                    ' ', '\t' => continue :charloop,
+                    ',' => {
+                        try setStackItemStart(stack, idx);
+                        state = .want_list_item;
+                    },
+                    ']' => state = try popStack(&stack),
+                    else => return error.BadToken,
+                },
+                .want_map_key => switch (char) {
+                    ' ', '\t' => continue :charloop,
+                    // forbid these characters so that flow dictionary keys cannot start
+                    // with characters that regular dictionary keys cannot start with
+                    // (even though they're unambiguous in this specific context).
+                    '{', '[', '#', '-', '>', '|', ',' => return error.BadToken,
+                    ':' => {
+                        // we have an empty map key
+                        dangling_key = "";
+                        state = .want_map_value;
+                    },
+                    '}' => state = try popStack(&stack),
+                    else => {
+                        try setStackItemStart(stack, idx);
+                        state = .consuming_map_key;
+                    },
+                },
+                .consuming_map_key => switch (char) {
+                    ':' => {
+                        const tip = try getStackTip(stack);
+                        dangling_key = try alloc.dupe(u8, contents[tip.item_start..idx]);
+
+                        state = .want_map_value;
+                    },
+                    else => continue :charloop,
+                },
+                .want_map_value => switch (char) {
+                    ' ', '\t' => continue :charloop,
+                    ',' => {
+                        const tip = try getStackTip(stack);
+                        try Parser.putMap(
+                            &tip.value.flow_map,
+                            dangling_key.?,
+                            Value.newScalar(alloc),
+                            dupe_behavior,
+                        );
+
+                        dangling_key = null;
+                        state = .want_map_key;
+                    },
+                    '[' => {
+                        const tip = try getStackTip(stack);
+
+                        const new_list = try Parser.putMapGetValue(
+                            &tip.value.flow_map,
+                            dangling_key.?,
+                            Value.newFlowList(alloc),
+                            dupe_behavior,
+                        );
+
+                        try stack.append(.{ .value = new_list, .item_start = idx + 1 });
+                        dangling_key = null;
+                        state = .want_list_item;
+                    },
+                    '{' => {
+                        const tip = try getStackTip(stack);
+
+                        const new_map = try Parser.putMapGetValue(
+                            &tip.value.flow_map,
+                            dangling_key.?,
+                            Value.newFlowMap(alloc),
+                            dupe_behavior,
+                        );
+
+                        try stack.append(.{ .value = new_map });
+                        dangling_key = null;
+                        state = .want_map_key;
+                    },
+                    '}' => {
+                        // the value is an empty string and this map is closed
+                        const tip = try getStackTip(stack);
+                        try Parser.putMap(
+                            &tip.value.flow_map,
+                            dangling_key.?,
+                            Value.newScalar(alloc),
+                            dupe_behavior,
+                        );
+
+                        dangling_key = null;
+                        state = try popStack(&stack);
+                    },
+                    else => {
+                        try setStackItemStart(stack, idx);
+                        state = .consuming_map_value;
+                    },
+                },
+                .consuming_map_value => switch (char) {
+                    ',', '}' => |term| {
+                        const tip = try getStackTip(stack);
+                        try Parser.putMap(
+                            &tip.value.flow_map,
+                            dangling_key.?,
+                            try Value.fromScalar(alloc, contents[tip.item_start..idx]),
+                            dupe_behavior,
+                        );
+                        dangling_key = null;
+                        state = .want_map_key;
+                        if (term == '}') state = try popStack(&stack);
+                    },
+                    else => continue :charloop,
+                },
+                .want_map_separator => switch (char) {
+                    ' ', '\t' => continue :charloop,
+                    ',' => state = .want_map_key,
+                    '}' => state = try popStack(&stack),
+                    else => return error.BadToken,
+                },
+                // the root value was closed but there are characters remaining
+                // in the buffer
+                .done => return error.BadState,
+            }
+        }
+        // we ran out of characters while still in the middle of an object
+        if (state != .done) return error.BadState;
+
+        return root;
    }

    inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value {
@@ -1138,297 +1339,3 @@ pub const Parser = struct {
        });
    }
 };
-
-pub const FlowParser = struct {
-    const FlowStackItem = struct {
-        value: *Value,
-        // lists need this. maps do also for keys and values.
-        item_start: usize = 0,
-    };
-
-    const FlowStack: type = std.ArrayList(FlowStackItem);
-
-    buffer: []const u8,
-    root: Value,
-    alloc: std.mem.Allocator,
-    stack: FlowStack,
-    state: ParseState,
-
-    // make this an ugly state machine parser
-    const ParseState = enum {
-        want_list_item,
-        consuming_list_item,
-        want_list_separator,
-        want_map_key,
-        consuming_map_key,
-        want_map_value,
-        consuming_map_value,
-        want_map_separator,
-        done,
-    };
-
-    const Error = error{
-        BadState,
-        BadToken,
-    } || std.mem.Allocator.Error;
-
-    pub fn initList(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
-        return .{
-            .buffer = buffer,
-            .root = undefined,
-            .alloc = alloc,
-            .stack = undefined,
-            .state = .want_list_item,
-        };
-    }
-
-    pub fn initMap(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
-        return .{
-            .buffer = buffer,
-            .root = undefined,
-            .alloc = alloc,
-            .stack = undefined,
-            .state = .want_map_key,
-        };
-    }
-
-    pub fn deinit(self: *FlowParser) void {
-        self.stack.deinit();
-    }
-
-    inline fn getStackTip(stack: FlowStack) Error!*FlowStackItem {
-        if (stack.items.len == 0) return error.BadState;
-        return &stack.items[stack.items.len - 1];
-    }
-
-    inline fn setStackItemStart(stack: FlowStack, start: usize) Error!void {
-        if (stack.items.len == 0) return error.BadState;
-        stack.items[stack.items.len - 1].item_start = start;
-    }
-
-    inline fn popStack(self: *FlowParser) Parser.Error!ParseState {
-        if (self.stack.popOrNull() == null)
-            return error.BadState;
-
-        const parent = self.stack.getLastOrNull() orelse return .done;
-
-        return switch (parent.value.*) {
-            .flow_list => .want_list_separator,
-            .flow_map => .want_map_separator,
-            else => return error.BadState,
-        };
-    }
-
-    pub fn parse(self: *FlowParser, dupe_behavior: Parser.DuplicateKeyBehavior) Parser.Error!Value {
-        // prime the stack:
-        switch (self.state) {
-            .want_list_item => {
-                self.root = Value.newFlowList(self.alloc);
-                self.stack = try FlowStack.initCapacity(self.alloc, 1);
-                self.stack.appendAssumeCapacity(.{ .value = &self.root });
-            },
-            .want_map_key => {
-                self.root = Value.newFlowMap(self.alloc);
-                self.stack = try FlowStack.initCapacity(self.alloc, 1);
-                self.stack.appendAssumeCapacity(.{ .value = &self.root });
-            },
-            else => {
-                return error.BadState;
-            },
-        }
-
-        var dangling_key: ?[]const u8 = null;
-
-        charloop: for (self.buffer, 0..) |char, idx| {
-            // std.debug.print("{s} => {c}\n", .{ @tagName(self.state), char });
-            switch (self.state) {
-                .want_list_item => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    ',' => {
-                        // empty value
-                        const tip = try getStackTip(self.stack);
-                        try tip.value.flow_list.append(Value.newScalar(self.alloc));
-                        tip.item_start = idx + 1;
-                    },
-                    '{' => {
-                        const tip = try getStackTip(self.stack);
-
-                        const new_map = try Parser.appendListGetValue(
-                            &tip.value.flow_list,
-                            Value.newFlowMap(self.alloc),
-                        );
-
-                        tip.item_start = idx;
-                        try self.stack.append(.{ .value = new_map });
-                        self.state = .want_map_key;
-                    },
-                    '[' => {
-                        const tip = try getStackTip(self.stack);
-
-                        const new_list = try Parser.appendListGetValue(
-                            &tip.value.flow_list,
-                            Value.newFlowList(self.alloc),
-                        );
-
-                        tip.item_start = idx;
-                        try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
-                        self.state = .want_list_item;
-                    },
-                    ']' => {
-                        const finished = self.stack.getLastOrNull() orelse return error.BadState;
-                        if (finished.value.flow_list.items.len > 0 or idx > finished.item_start)
-                            try finished.value.flow_list.append(Value.newScalar(self.alloc));
-                        self.state = try self.popStack();
-                    },
-                    else => {
-                        try setStackItemStart(self.stack, idx);
-                        self.state = .consuming_list_item;
-                    },
-                },
-                .consuming_list_item => switch (char) {
-                    ',' => {
-                        const tip = try getStackTip(self.stack);
-
-                        try tip.value.flow_list.append(
-                            try Value.fromScalar(self.alloc, self.buffer[tip.item_start..idx]),
-                        );
-                        tip.item_start = idx + 1;
-
-                        self.state = .want_list_item;
-                    },
-                    ']' => {
-                        const finished = self.stack.getLastOrNull() orelse return error.BadState;
-                        try finished.value.flow_list.append(
-                            try Value.fromScalar(self.alloc, self.buffer[finished.item_start..idx]),
-                        );
-                        self.state = try self.popStack();
-                    },
-                    else => continue :charloop,
-                },
-                .want_list_separator => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    ',' => {
-                        try setStackItemStart(self.stack, idx);
-                        self.state = .want_list_item;
-                    },
-                    ']' => self.state = try self.popStack(),
-                    else => return error.BadToken,
-                },
-                .want_map_key => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    // forbid these characters so that flow dictionary keys cannot start
-                    // with characters that regular dictionary keys cannot start with
-                    // (even though they're unambiguous in this specific context).
-                    '{', '[', '#', '>', '|', ',' => return error.BadToken,
-                    ':' => {
-                        // we have an empty map key
-                        dangling_key = "";
-                        self.state = .want_map_value;
-                    },
-                    '}' => self.state = try self.popStack(),
-                    else => {
-                        try setStackItemStart(self.stack, idx);
-                        self.state = .consuming_map_key;
-                    },
-                },
-                .consuming_map_key => switch (char) {
-                    ':' => {
-                        const tip = try getStackTip(self.stack);
-                        dangling_key = try self.alloc.dupe(u8, self.buffer[tip.item_start..idx]);
-
-                        self.state = .want_map_value;
-                    },
-                    else => continue :charloop,
-                },
-                .want_map_value => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    ',' => {
-                        const tip = try getStackTip(self.stack);
-                        try Parser.putMap(
-                            &tip.value.flow_map,
-                            dangling_key.?,
-                            Value.newScalar(self.alloc),
-                            dupe_behavior,
-                        );
-
-                        dangling_key = null;
-                        self.state = .want_map_key;
-                    },
-                    '[' => {
-                        const tip = try getStackTip(self.stack);
-
-                        const new_list = try Parser.putMapGetValue(
-                            &tip.value.flow_map,
-                            dangling_key.?,
-                            Value.newFlowList(self.alloc),
-                            dupe_behavior,
-                        );
-
-                        try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
-                        dangling_key = null;
-                        self.state = .want_list_item;
-                    },
-                    '{' => {
-                        const tip = try getStackTip(self.stack);
-
-                        const new_map = try Parser.putMapGetValue(
-                            &tip.value.flow_map,
-                            dangling_key.?,
-                            Value.newFlowMap(self.alloc),
-                            dupe_behavior,
-                        );
-
-                        try self.stack.append(.{ .value = new_map });
-                        dangling_key = null;
-                        self.state = .want_map_key;
-                    },
-                    '}' => {
-                        // the value is an empty string and this map is closed
-                        const tip = try getStackTip(self.stack);
-                        try Parser.putMap(
-                            &tip.value.flow_map,
-                            dangling_key.?,
-                            Value.newScalar(self.alloc),
-                            dupe_behavior,
-                        );
-
-                        dangling_key = null;
-                        self.state = try self.popStack();
-                    },
-                    else => {
-                        try setStackItemStart(self.stack, idx);
-                        self.state = .consuming_map_value;
-                    },
-                },
-                .consuming_map_value => switch (char) {
-                    ',', '}' => |term| {
-                        const tip = try getStackTip(self.stack);
-                        try Parser.putMap(
-                            &tip.value.flow_map,
-                            dangling_key.?,
-                            try Value.fromScalar(self.alloc, self.buffer[tip.item_start..idx]),
-                            dupe_behavior,
-                        );
-                        dangling_key = null;
-                        self.state = .want_map_key;
-                        if (term == '}') self.state = try self.popStack();
-                    },
-                    else => continue :charloop,
-                },
-                .want_map_separator => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    ',' => self.state = .want_map_key,
-                    '}' => self.state = try self.popStack(),
-                    else => return error.BadToken,
-                },
-                // the root value was closed but there are characters remaining
-                // in the buffer
-                .done => return error.BadState,
-            }
-        }
-        // we ran out of characters while still in the middle of an object
-        if (self.state != .done) return error.BadState;
-
-        return self.root;
-    }
-};
Author	SHA1	Message	Date
torque	96b950755b	config: migrate flow parser into the main parser object I think I am actually going to make this a method of the ParserState struct soon so lol check out my freaking code churn. But here we are.	2023-09-23 13:29:49 -07:00
torque	9c866970f8	config: remove some duplication in the parser There's still a fair amount lurking in here, but I believe this logic is sound. Rather than duplicating the map/list logic under the opposing key, we set the logic up to use the second loop around (this is was how dedents worked, and now it also works for indents). I'm not convinced this is as easy to follow, and it did lead me to add some additional unreachables to the code, which should maybe be turned into error returns instead. It does reduce the odds of a code change missing a copied instance, which I think is a good thing.	2023-09-23 01:07:04 -07:00
torque	47f4a1c479	config: dupe map keys I didn't do an exhaustive search, but it seems that the managed hashmaps only allocates space for the structure of the map itself, not its keys or values. This mostly makes sense, but it also means that this was only working due to the fact that I am currently not freeing the input buffer until after iterating through the parse result. Looking through this, I'm also reasonably surprised by how many times this is assigned in the normal parsing vs the flow parsing. There is a lot more repetition in the code of the normal parser, I think because it does not have a granular state machine. It may be worth revisiting the structure to see if a more detailed state machine, like the one used for parsing the flow-style objects, would reduce the amount of code repetition here. I suspect it certainly could be better than it currently is, since it seems unlikely that there really are four different scenarios where we need to be parsing a dictionary key. Taking a quick glance at it, it looks like I could be taking better advantage of the flipflop loop on indent as well as dedent. This might be a bit less efficient due to essentially being less loop unrolling, but it would also potentially make more maintainable code by having less manual repetition.	2023-09-22 01:01:46 -07:00
torque	e9cf908b61	config: use std.StringArrayHashMap for the map type As I was thinking about this, I realized that data serialization is much more of a bear than deserialization. Or, more accurately, trying to make stable round trip serialization a goal puts heavier demands on deserialization, including preserving input order. I think there may be a mountain hiding under this molehill, though, because the goals of having a format that is designed to be handwritten and also machine written are at odds with each other. Right now, the parser does not preserve comments at all. But even if we did (they could easily become a special type of string), comment indentation is ignored. Comments are not directly a child of any other part of the document, they're awkward text that exists interspersed throughout it. With the current design, there are some essentially unsolvable problems, like comments interspersed throughout multiline strings. The string is processed into a single object in the output, so there can't be weird magic data interleaved with it because it loses the concept of being interleaved entirely (this is a bigger issue for space strings, which don't even preserve a unique way to reserialize them. Line strings at least contain a character (the newline) that can appear nowhere else but at a break in the string). Obviously this isn't technically impossible, but it would require a change to the way that values are modeled. And even if we did take the approach of associating a comment with, say, the value that follows it (which I think is a reasonable thing to do, ignoring the interleaved comment situation described above), if software reads in data, changes it, and writes it back out, how do we account for deleted items? Does the comment get deleted with the item? Does it become a dangling comment that just gets shoved somewhere in the document? How are comments that come after everything else in the document handled? From a pure data perspective, it's fairly obvious why JSON omits comments: they're trivial to parse, but there's not a strategy for emitting them that will always be correct, especially in a format that doesn't give a hoot about linebreaks. It may be interesting to look at fancy TOML (barf) parsers to see how they handle comments, though I assume the general technique is to store their row position in the original document and track when a line is added or removed. Ultimately, I think the use case of a format to be written by humans and read by computers is still useful. That's my intended use case for this and why I started it, but its application as a configuration file format is probably hamstrung muchly by software not being able to write it back. On the other hand, there's a lot of successful software I use where the config files are not written directly by the software at all, so maybe it's entirely fine to declare this as being out of scope and not worrying about it further. At the very least it's almost certainly less of an issue than erroring on carriage returns. Also the fact that certain keys are simply unrepresentable. As a side note, I guess what they say about commit message length being inversely proportional to the change length is true. Hope you enjoyed the blog over this 5 character change.	2023-09-22 01:01:46 -07:00
torque	6415571d01	config: refactor LineTokenizer to use an internal line buffer The goal here is to support a streaming parser. However, I did decide the leave the flow item parser state machine as fully buffered (i.e. not streaming). This is not JSON and in general documents should be many, shorter lines, so this buffering strategy should work reasonably well. I have not actually tried the streaming implementation of this, yet.	2023-09-22 01:00:17 -07:00
torque	ab580fa80a	config: differentiate fields in Value This makes handling Value very slightly more work, but it provides useful metadata that can be used to perform better conversion and serialization. The motivation behind the "scalar" type is that in general, only scalars can be coerced to other types. For example, a scalar `null` and a string `> null` have the same in-memory representation. If they are treated identically, this precludes unambiguously converting an optional string whose contents are "null". With the two disambiguated, we can choose to convert `null` to the null object and `> null` to a string of contents "null". This ambiguity does not necessary exist for the standard boolean values `true` and `false`, but it does allow the conversion to be more strict, and it will theoretically result in documents that read more naturally. The motivation behind exposing flow_list and flow_map is that it will allow preserving document formatting round trip (well, this isn't strictly true: single line explicit strings neither remember whether they were line strings or space strings, and they don't remember if they were indented. However, that is much less information to lose). The following formulations will parse to the same indistinguishable value: key: > value key: > value key: \| value key: \| value I think that's okay. It's a lot easier to chose a canonical form for this case than it is for a map/list without any hints regarding its origin.	2023-09-22 01:00:17 -07:00
torque	b18326a07a	config: start doing some code cleanup I was pretty sloppy with the code organization while writing out the state machines because my focus was on thinking through the parsing process and logic there. However, The code was not in good shape to continue implementing code features (not document features). This is the first of probably several commits that will work on cleaning up some things. Value has been promoted to the top level namespace, and Document has an initializer function. Referencing Value.List and Value.Map are much cleaner now. Type aliases are good. For the flow parser, `popStack` does not have to access anything except the current stack. This can be passed in as a parameter. This means that `parse` is ready to be refactored to take a buffer and an allocator. The main next steps for code improvement are: 1. reentrant/streaming parser. I am planning to leave it as line-buffered, though I could go further. Line-buffered has two main benefits: the tokenizer doesn't need to be refactored significantly, and the flow parser doesn't need to be made reentrant. I may reevaluate this as I am implementing it, however, as those changes may be simpler than I think. 2. Actually implement the error diagnostics info. I have some skeleton structure in place for this, so it should just be doing the work of getting it hooked up. 3. Parse into object. Metaprogramming, let's go. It will be interesting to try to do this non-recursively, as well (curious to see if it results in code bloat). 4. Object to Document. This is probably going to be annoying, since there are a variety of edge cases that will have to be handled. And lots of objects that cannot be represented as documents. 5. Serialize Document. One thing the parser does not preserve is whether a Value was flow-style or not, so it will be impossible to do round-trip formatting preservation. That's currently a non-goal, and I haven't decided yet if flow-style output should be based on some heuristic (number/length of values in container) or just never emitted. Lack of round-trip preservation does make using this as a general purpose config format a lot more dubious, so I will have to think about this some more. 6. Document to JSON. Why not? I will hand roll this and it will suck. And then everything will be perfect and never need to be touched again.	2023-09-22 00:53:26 -07:00
torque	cd05097a78	config: add terminated strings This was the final feature I wanted to add to the format. Also some other things have been cleaned up a little bit (for example, the inline parser does not need the dangling key to be attached to each stack level just like the normal parser doesn't). There was also an off-by-one error that bugged out detecting the pathological case of a flow list consisting of only an empty string (`[ ]`, not to be mistaken for the empty list `[]`). Mixed multiline strings are a bit confusing but internally consistent. > what character does this string end with? \| ends with a newline character because that's the style of the second-to-last line. However, seeing \| last makes my brain think it should end with a space. The reason it ends with a newline is because our concatenation strategy consists of appending to the string early (as soon as a line is added) rather than lazily. This is a tradeoff, though. while lazy appending would make this result more intuitive (the string would end with a space) and it would allow us to remove the self-proclaimed cheesy hack, it would make the opposite boundary condition confusing: > \| what character does this string start with? With lazy appending, this string would start with a space (despite > making it look like it should have a leading newline). While both of these are likely to be uncommon edge cases, it doesn't seem we can have it both ways. Of the two options, I think the current logic is a little bit more clear.	2023-09-22 00:53:26 -07:00