const std = @import("std"); const Options = @import("../parser.zig").Options; pub const Document = struct { arena: std.heap.ArenaAllocator, root: Value, pub fn init(alloc: std.mem.Allocator) Document { return .{ .arena = std.heap.ArenaAllocator.init(alloc), .root = undefined, }; } pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) { return .{ .value = try self.root.convertTo(T, self.arena.allocator(), options), .arena = self.arena, }; } pub fn printDebug(self: Document) void { return self.root.printDebug(); } pub fn deinit(self: Document) void { self.arena.deinit(); } }; pub fn Parsed(comptime T: type) type { return struct { value: T, arena: std.heap.ArenaAllocator, pub fn deinit(self: @This()) void { self.arena.deinit(); } }; } pub const Value = union(enum) { pub const String = std.ArrayList(u8); pub const Map = std.StringArrayHashMap(Value); pub const List = std.ArrayList(Value); pub const TagType = @typeInfo(Value).Union.tag_type.?; scalar: String, string: String, list: List, flow_list: List, map: Map, flow_map: Map, pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T { switch (@typeInfo(T)) { .Void => { switch (self) { .scalar => |str| return if (str.items.len == 0) void{} else error.BadValue, .string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue, else => return error.BadValue, } }, .Bool => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; for (options.boolean_strings.truthy) |check| if (std.mem.eql(u8, str.items, check)) return true; for (options.boolean_strings.falsy) |check| if (std.mem.eql(u8, str.items, check)) return false; return error.BadValue; }, else => return error.BadValue, } }, .Int, .ComptimeInt => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; std.debug.print("'{s}'\n", .{str.items}); return try std.fmt.parseInt(T, str.items, 0); }, else => return error.BadValue, } }, .Float, .ComptimeFloat => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; return try std.fmt.parseFloat(T, str.items, 0); }, else => return error.BadValue, } }, .Pointer => |ptr| switch (ptr.size) { .Slice => { // TODO: There is ambiguity here because a document expecting a list // of u8 could parse a string instead. Introduce a special // type to use for this? the problem is that it becomes // invasive into downstream code. Ultimately this should // probably be solved in the zig stdlib or similar. // TODO: This also doesn't handle sentinels properly. switch (self) { .scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue, .list, .flow_list => |lst| { var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len); errdefer result.deinit(); for (lst.items) |item| { result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options)); } return result.toOwnedSlice(); }, else => return error.BadValue, } }, .One => { const result = try allocator.create(ptr.child); errdefer allocator.destroy(result); result.* = try self.convertTo(ptr.child, allocator, options); return result; }, else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers. }, .Array => |arr| { // TODO: There is ambiguity here because a document expecting a list // of u8 could parse a string instead. Introduce a special // type to use for this? the problem is that it becomes // invasive into downstream code. Ultimately this should // probably be solved in the zig stdlib or similar. // TODO: This also doesn't handle sentinels properly. switch (self) { .scalar, .string => |str| { if (arr.child == u8 and str.items.len == arr.len) { var result: T = undefined; @memcpy(&result, str.items); return result; } else return error.BadValue; }, .list, .flow_list => |lst| { var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len); defer storage.deinit(); for (lst.items) |item| { storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options)); } // this may result in a big stack allocation, which is not ideal var result: T = undefined; @memcpy(&result, storage.items); return result; }, else => return error.BadValue, } }, .Struct => |stt| { if (comptime std.meta.trait.hasFn("deserializeNice")(T)) return T.deserializeNice(self, allocator, options); if (stt.is_tuple) { switch (self) { .list, .flow_list => |list| { if (list.items.len != stt.fields.len) return error.BadValue; var result: T = undefined; inline for (stt.fields, 0..) |field, idx| { result[idx] = try list.items[idx].convertTo(field.type, allocator, options); } return result; }, else => return error.BadValue, } } switch (self) { .map, .flow_map => |map| { var result: T = undefined; if (options.ignore_extra_fields) { inline for (stt.fields) |field| { if (map.get(field.name)) |value| { @field(result, field.name) = try value.convertTo(field.type, allocator, options); } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) { @field(result, field.name) = null; } else { std.debug.print("{s}\n", .{field.name}); return error.BadValue; } } } else { // we could iterate over each map key and do an exhaustive // comparison with each struct field name. This would save // memory and it would probably be a fair amount faster for // small structs. var clone = try map.clone(); defer clone.deinit(); inline for (stt.fields) |field| { if (clone.fetchSwapRemove(field.name)) |kv| { @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options); } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) { @field(result, field.name) = null; } else return error.BadValue; } // there were extra fields in the data if (clone.count() > 0) return error.BadValue; } return result; }, else => return error.BadValue, } }, .Enum => { if (comptime std.meta.trait.hasFn("deserializeNice")(T)) return T.deserializeNice(self, allocator, options); switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; if (std.meta.stringToEnum(T, str.items)) |value| return value; if (options.allow_numeric_enums) { const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch return error.BadValue; return std.meta.intToEnum(T, parsed) catch error.BadValue; } return error.BadValue; }, else => return error.BadValue, } }, .Union => |unn| { if (comptime std.meta.trait.hasFn("deserializeNice")(T)) return T.deserializeNice(self, allocator, options); if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T)); switch (self) { .map, .flow_map => |map| { // a union may not ever be deserialized from a map with more than one value if (map.count() != 1) return error.BadValue; const key = map.keys()[0]; inline for (unn.fields) |field| { if (std.mem.eql(u8, key, field.name)) return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options)); } return error.BadValue; }, // TODO: if the field is a 0 width type like void, we could parse it // directly from a scalar/string value (i.e. a name with no // corresponding value) else => return error.BadValue, } }, .Optional => |opt| { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; for (options.null_strings) |check| if (std.mem.eql(u8, str.items, check)) return null; return try self.convertTo(opt.child, allocator, options); }, else => return error.BadValue, } }, else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)), } } pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value { return try _fromScalarOrString(alloc, .scalar, input); } pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value { return try _fromScalarOrString(alloc, .string, input); } inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value { var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len)); @field(res, @tagName(classification)).appendSliceAssumeCapacity(input); return res; } pub inline fn newScalar(alloc: std.mem.Allocator) Value { return .{ .scalar = String.init(alloc) }; } pub inline fn newString(alloc: std.mem.Allocator) Value { return .{ .string = String.init(alloc) }; } pub inline fn newList(alloc: std.mem.Allocator) Value { return .{ .list = List.init(alloc) }; } pub inline fn newFlowList(alloc: std.mem.Allocator) Value { return .{ .flow_list = List.init(alloc) }; } pub inline fn newMap(alloc: std.mem.Allocator) Value { return .{ .map = Map.init(alloc) }; } pub inline fn newFlowMap(alloc: std.mem.Allocator) Value { return .{ .flow_map = Map.init(alloc) }; } pub fn recursiveEqualsExact(self: Value, other: Value) bool { if (@as(TagType, self) != other) return false; switch (self) { inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items), inline .list, .flow_list => |lst, tag| { const olst = @field(other, @tagName(tag)); if (lst.items.len != olst.items.len) return false; for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false; return true; }, inline .map, .flow_map => |map, tag| { const omap = @field(other, @tagName(tag)); if (map.count() != omap.count()) return false; var iter = map.iterator(); var oiter = omap.iterator(); // this loop structure enforces that the maps are in the same order while (iter.next()) |this| { const that = oiter.next() orelse return false; if (!std.mem.eql(u8, this.key_ptr.*, that.key_ptr.*) or !this.value_ptr.recursiveEqualsExact(that.value_ptr.*)) return false; } // the maps are equal if we have also consumed all of the values from // other. return oiter.next() == null; }, } } pub fn printDebug(self: Value) void { self.printRecursive(0); std.debug.print("\n", .{}); } fn printRecursive(self: Value, indent: usize) void { switch (self) { .scalar, .string => |str| { if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| { var lines = std.mem.splitScalar(u8, str.items, '\n'); std.debug.print("\n", .{}); while (lines.next()) |line| { std.debug.print( "{[empty]s: >[indent]}{[line]s}{[nl]s}", .{ .empty = "", .indent = indent, .line = line, .nl = if (lines.peek() == null) "" else "\n", }, ); } } else { std.debug.print("{s}", .{str.items}); } }, .list, .flow_list => |list| { if (list.items.len == 0) { std.debug.print("[]", .{}); return; } std.debug.print("[\n", .{}); for (list.items, 0..) |value, idx| { std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx }); value.printRecursive(indent + 2); std.debug.print(",\n", .{}); } std.debug.print( "{[empty]s: >[indent]}]", .{ .empty = "", .indent = indent }, ); }, .map, .flow_map => |map| { if (map.count() == 0) { std.debug.print("{{}}", .{}); return; } std.debug.print("{{\n", .{}); var iter = map.iterator(); while (iter.next()) |entry| { std.debug.print( "{[empty]s: >[indent]}{[key]s}: ", .{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* }, ); entry.value_ptr.printRecursive(indent + 4); std.debug.print(",\n", .{}); } std.debug.print( "{[empty]s: >[indent]}}}", .{ .empty = "", .indent = indent }, ); }, } } };