value: implement parsing to objects

There are still some untested codepaths here, but this does seem to work for nontrivial objects, so, woohoo. It's worth noting that this is a recursive implementation (which seems silly after I hand-rolled the non-recursive main parser). The thinking is that if you have a deeply-enough nested object that you run out of stack space here, you probably shouldn't be converting it directly to an object. I may revisit this, though I am still not 100% certain how straightforward it would be to make this nonrecursive with all the weird comptime objects. Basically the "parse stack" would have to be created at comptime.
2023-10-03 23:17:37 -07:00
parent 0028092a4e
commit 34ec58e0d2
3 changed files with 321 additions and 22 deletions
--- a/src/parser/state.zig
+++ b/src/parser/state.zig
@@ -5,28 +5,9 @@ const Error = @import("../parser.zig").Error;
 const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
 const Options = @import("../parser.zig").Options;
 const Diagnostics = @import("../parser.zig").Diagnostics;
+const Document = @import("./value.zig").Document;
 const Value = @import("./value.zig").Value;

-pub const Document = struct {
-    arena: std.heap.ArenaAllocator,
-    root: Value,
-
-    pub fn init(alloc: std.mem.Allocator) Document {
-        return .{
-            .arena = std.heap.ArenaAllocator.init(alloc),
-            .root = undefined,
-        };
-    }
-
-    pub fn printDebug(self: Document) void {
-        return self.root.printDebug();
-    }
-
-    pub fn deinit(self: Document) void {
-        self.arena.deinit();
-    }
-};
-
 const FlowParseState = enum {
    want_list_item,
    consuming_list_item,
--- a/src/parser/value.zig
+++ b/src/parser/value.zig
@@ -1,5 +1,45 @@
 const std = @import("std");

+const Options = @import("../parser.zig").Options;
+
+pub const Document = struct {
+    arena: std.heap.ArenaAllocator,
+    root: Value,
+
+    pub fn init(alloc: std.mem.Allocator) Document {
+        return .{
+            .arena = std.heap.ArenaAllocator.init(alloc),
+            .root = undefined,
+        };
+    }
+
+    pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) {
+        return .{
+            .value = try self.root.convertTo(T, self.arena.allocator(), options),
+            .arena = self.arena,
+        };
+    }
+
+    pub fn printDebug(self: Document) void {
+        return self.root.printDebug();
+    }
+
+    pub fn deinit(self: Document) void {
+        self.arena.deinit();
+    }
+};
+
+pub fn Parsed(comptime T: type) type {
+    return struct {
+        value: T,
+        arena: std.heap.ArenaAllocator,
+
+        pub fn deinit(self: @This()) void {
+            self.arena.deinit();
+        }
+    };
+}
+
 pub const Value = union(enum) {
    pub const String = std.ArrayList(u8);
    pub const Map = std.StringArrayHashMap(Value);
@@ -13,6 +53,219 @@ pub const Value = union(enum) {
    map: Map,
    flow_map: Map,

+    pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
+        switch (@typeInfo(T)) {
+            .Void => {
+                switch (self) {
+                    .scalar => |str| return if (str.items.len == 0) void{} else error.BadValue,
+                    .string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue,
+                    else => return error.BadValue,
+                }
+            },
+            .Bool => {
+                switch (self) {
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        for (options.boolean_strings.truthy) |check|
+                            if (std.mem.eql(u8, str.items, check)) return true;
+                        for (options.boolean_strings.falsy) |check|
+                            if (std.mem.eql(u8, str.items, check)) return false;
+
+                        return error.BadValue;
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Int, .ComptimeInt => {
+                switch (self) {
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        std.debug.print("'{s}'\n", .{str.items});
+                        return try std.fmt.parseInt(T, str.items, 0);
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Float, .ComptimeFloat => {
+                switch (self) {
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        return try std.fmt.parseFloat(T, str.items, 0);
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Pointer => |ptr| switch (ptr.size) {
+                .Slice => {
+                    // TODO: There is ambiguity here because a document expecting a list
+                    //       of u8 could parse a string instead. Introduce a special
+                    //       type to use for this? the problem is that it becomes
+                    //       invasive into downstream code. Ultimately this should
+                    //       probably be solved in the zig stdlib or similar.
+                    // TODO: This also doesn't handle sentinels properly.
+                    switch (self) {
+                        .scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue,
+                        .list, .flow_list => |lst| {
+                            var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
+                            errdefer result.deinit();
+                            for (lst.items) |item| {
+                                result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options));
+                            }
+                            return result.toOwnedSlice();
+                        },
+                        else => return error.BadValue,
+                    }
+                },
+                .One => {
+                    const result = try allocator.create(ptr.child);
+                    errdefer allocator.destroy(result);
+                    result.* = try self.convertTo(ptr.child, allocator, options);
+                    return result;
+                },
+                else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers.
+            },
+            .Array => |arr| {
+                // TODO: There is ambiguity here because a document expecting a list
+                //       of u8 could parse a string instead. Introduce a special
+                //       type to use for this? the problem is that it becomes
+                //       invasive into downstream code. Ultimately this should
+                //       probably be solved in the zig stdlib or similar.
+                // TODO: This also doesn't handle sentinels properly.
+                switch (self) {
+                    .scalar, .string => |str| {
+                        if (arr.child == u8 and str.items.len == arr.len) {
+                            var result: T = undefined;
+                            @memcpy(&result, str.items);
+                            return result;
+                        } else return error.BadValue;
+                    },
+                    .list, .flow_list => |lst| {
+                        var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len);
+                        defer storage.deinit();
+                        for (lst.items) |item| {
+                            storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options));
+                        }
+                        // this may result in a big stack allocation, which is not ideal
+                        var result: T = undefined;
+                        @memcpy(&result, storage.items);
+                        return result;
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Struct => |stt| {
+                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                    return T.deserializeNice(self, allocator, options);
+
+                if (stt.is_tuple) {
+                    switch (self) {
+                        .list, .flow_list => |list| {
+                            if (list.items.len != stt.fields.len) return error.BadValue;
+                            var result: T = undefined;
+                            inline for (stt.fields, 0..) |field, idx| {
+                                result[idx] = try list.items[idx].convertTo(field.type, allocator, options);
+                            }
+                            return result;
+                        },
+                        else => return error.BadValue,
+                    }
+                }
+
+                switch (self) {
+                    .map, .flow_map => |map| {
+                        var result: T = undefined;
+
+                        if (options.ignore_extra_fields) {
+                            inline for (stt.fields) |field| {
+                                if (map.get(field.name)) |value| {
+                                    @field(result, field.name) = try value.convertTo(field.type, allocator, options);
+                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
+                                    @field(result, field.name) = null;
+                                } else {
+                                    std.debug.print("{s}\n", .{field.name});
+                                    return error.BadValue;
+                                }
+                            }
+                        } else {
+                            // we could iterate over each map key and do an exhaustive
+                            // comparison with each struct field name. This would save
+                            // memory and it would probably be a fair amount faster for
+                            // small structs.
+                            var clone = try map.clone();
+                            defer clone.deinit();
+                            inline for (stt.fields) |field| {
+                                if (clone.fetchSwapRemove(field.name)) |kv| {
+                                    @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
+                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
+                                    @field(result, field.name) = null;
+                                } else return error.BadValue;
+                            }
+                            // there were extra fields in the data
+                            if (clone.count() > 0) return error.BadValue;
+                        }
+
+                        return result;
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Enum => {
+                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                    return T.deserializeNice(self, allocator, options);
+
+                switch (self) {
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        if (std.meta.stringToEnum(T, str.items)) |value| return value;
+                        if (options.allow_numeric_enums) {
+                            const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch
+                                return error.BadValue;
+                            return std.meta.intToEnum(T, parsed) catch error.BadValue;
+                        }
+                        return error.BadValue;
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            .Union => |unn| {
+                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                    return T.deserializeNice(self, allocator, options);
+
+                if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));
+
+                switch (self) {
+                    .map, .flow_map => |map| {
+                        // a union may not ever be deserialized from a map with more than one value
+                        if (map.count() != 1) return error.BadValue;
+                        const key = map.keys()[0];
+                        inline for (unn.fields) |field| {
+                            if (std.mem.eql(u8, key, field.name))
+                                return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
+                        }
+                        return error.BadValue;
+                    },
+                    // TODO: if the field is a 0 width type like void, we could parse it
+                    //       directly from a scalar/string value (i.e. a name with no
+                    //       corresponding value)
+                    else => return error.BadValue,
+                }
+            },
+            .Optional => |opt| {
+                switch (self) {
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        for (options.null_strings) |check|
+                            if (std.mem.eql(u8, str.items, check)) return null;
+
+                        return try self.convertTo(opt.child, allocator, options);
+                    },
+                    else => return error.BadValue,
+                }
+            },
+            else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)),
+        }
+    }
+
    pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
        return try _fromScalarOrString(alloc, .scalar, input);
    }