// Copyright 2023 torque@epicyclic.dev // // Licensed under the MIT/Expat license. You may not use this file except in // compliance with the license. You may obtain a copy of the license at // // https://spdx.org/licenses/MIT.html // // This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. const std = @import("std"); const hasFn = if (@hasDecl(std.meta, "trait")) struct { fn hasFn(comptime T: type, comptime name: []const u8) bool { return std.meta.trait.hasFn(name)(T); } }.hasFn else std.meta.hasFn; const Options = @import("../parser.zig").Options; pub const Document = struct { arena: std.heap.ArenaAllocator, root: Value, pub fn init(alloc: std.mem.Allocator) Document { return .{ .arena = std.heap.ArenaAllocator.init(alloc), .root = undefined, }; } pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) { return .{ .value = try self.root.convertTo(T, self.arena.allocator(), options), .arena = self.arena, }; } pub fn printDebug(self: Document) void { return self.root.printDebug(); } pub fn deinit(self: Document) void { self.arena.deinit(); } }; pub fn Parsed(comptime T: type) type { return struct { value: T, arena: std.heap.ArenaAllocator, pub fn deinit(self: @This()) void { self.arena.deinit(); } }; } pub const Value = union(enum) { pub const String = [:0]const u8; pub const Map = std.StringArrayHashMap(Value); pub const List = std.ArrayList(Value); pub const TagType = @typeInfo(Value).Union.tag_type.?; scalar: String, string: String, list: List, inline_list: List, map: Map, inline_map: Map, pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T { switch (@typeInfo(T)) { .Void => { switch (self) { .scalar => |str| return if (str.len == 0) void{} else error.BadValue, .string => |str| return if (options.coerce_strings and str.len == 0) void{} else error.BadValue, else => return error.BadValue, } }, .Bool => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; if (options.case_insensitive_scalar_coersion) { for (options.boolean_scalars.truthy) |check| if (std.ascii.eqlIgnoreCase(str, check)) return true; for (options.boolean_scalars.falsy) |check| if (std.ascii.eqlIgnoreCase(str, check)) return false; } else { for (options.boolean_scalars.truthy) |check| if (std.mem.eql(u8, str, check)) return true; for (options.boolean_scalars.falsy) |check| if (std.mem.eql(u8, str, check)) return false; } return error.BadValue; }, else => return error.BadValue, } }, .Int, .ComptimeInt => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; return try std.fmt.parseInt(T, str, 0); }, else => return error.BadValue, } }, .Float, .ComptimeFloat => { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; return try std.fmt.parseFloat(T, str); }, else => return error.BadValue, } }, .Pointer => |ptr| switch (ptr.size) { .Slice => { // TODO: There is ambiguity here because a document expecting a list // of u8 could parse a string instead. Introduce a special // type to use for this? the problem is that it becomes // invasive into downstream code. Ultimately this should // probably be solved in the zig stdlib or similar. switch (self) { .scalar, .string => |str| { if (comptime ptr.child == u8) { if (comptime ptr.sentinel) |sentinel| if (comptime @as(*align(1) const ptr.child, @ptrCast(sentinel)).* != 0) return error.BadValue; return str; } else { return error.BadValue; } }, .list, .inline_list => |lst| { const result = try allocator.alloc(ptr.child, lst.items.len + @intFromBool(ptr.sentinel != null)); for (result[0..lst.items.len], lst.items) |*res, item| { res.* = try item.convertTo(ptr.child, allocator, options); } if (comptime ptr.sentinel) |sentinel| { const sval = @as(*align(1) const ptr.child, @ptrCast(sentinel)).*; result[lst.items.len] = sval; return result[0..lst.items.len :sval]; } else { return result; } }, else => return error.BadValue, } }, .One => { const result = try allocator.create(ptr.child); errdefer allocator.destroy(result); result.* = try self.convertTo(ptr.child, allocator, options); return result; }, else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), }, .Array => |arr| { // TODO: There is ambiguity here because a document expecting a list // of u8 could parse a string instead. Introduce a special // type to use for this? the problem is that it becomes // invasive into downstream code. Ultimately this should // probably be solved in the zig stdlib or similar. switch (self) { .scalar, .string => |str| { if (arr.child == u8 and str.len == arr.len) { var result: T = undefined; @memcpy(&result, str); return result; } else return error.BadValue; }, .list, .inline_list => |lst| { if (lst.items.len != arr.len) return error.BadValue; var result: T = undefined; for (&result, lst.items) |*res, item| { res.* = try item.convertTo(arr.child, allocator, options); } return result; }, else => return error.BadValue, } }, .Struct => |stt| { if (comptime hasFn(T, "deserializeNice")) return T.deserializeNice(self, allocator, options); if (stt.is_tuple) { switch (self) { .list, .inline_list => |list| { if (list.items.len != stt.fields.len) return error.BadValue; var result: T = undefined; inline for (stt.fields, &result, list.items) |field, *res, item| { res.* = try item.convertTo(field.type, allocator, options); } return result; }, else => return error.BadValue, } } switch (self) { .map, .inline_map => |map| { var result: T = undefined; if (options.ignore_extra_fields) { inline for (stt.fields) |field| { if (map.get(field.name)) |value| { @field(result, field.name) = try value.convertTo(field.type, allocator, options); } else if (options.allow_omitting_default_values) { if (comptime field.default_value) |def| @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).* else return error.BadValue; } else { return error.BadValue; } } } else { // TODO: consider not cloning the map here. This would // result in the requirement that the raw value object // not be used after it has been converted to a type, // based on the parse options. var clone = try map.clone(); defer clone.deinit(); inline for (stt.fields) |field| { if (clone.fetchSwapRemove(field.name)) |kv| { @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options); } else if (options.allow_omitting_default_values) { if (comptime field.default_value) |def| @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).* else return error.BadValue; } else return error.BadValue; } // there were extra fields in the data if (clone.count() > 0) return error.BadValue; } return result; }, else => return error.BadValue, } }, .Enum => { if (comptime hasFn(T, "deserializeNice")) return T.deserializeNice(self, allocator, options); switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; const name = if (options.expect_enum_dot) blk: { if (str.len > 0 and str[0] == '.') break :blk str[1..] else return error.BadValue; } else str; if (std.meta.stringToEnum(T, name)) |value| return value; if (options.allow_numeric_enums) { const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str, 10) catch return error.BadValue; return std.meta.intToEnum(T, parsed) catch error.BadValue; } return error.BadValue; }, else => return error.BadValue, } }, .Union => |unn| { if (comptime hasFn(T, "deserializeNice")) return T.deserializeNice(self, allocator, options); if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T)); switch (self) { .map, .inline_map => |map| { // a union may not ever be deserialized from a map with more // (or less) than one value if (map.count() != 1) return error.BadValue; const key = map.keys()[0]; const name = if (options.expect_enum_dot) blk: { if (key.len > 0 and key[0] == '.') break :blk key[1..] else return error.BadValue; } else key; inline for (unn.fields) |field| { if (std.mem.eql(u8, name, field.name)) return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options)); } return error.BadValue; }, inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; const name = if (options.expect_enum_dot) blk: { if (str.len > 0 and str[0] == '.') break :blk str[1..] else return error.BadValue; } else str; inline for (unn.fields) |field| { if (@sizeOf(field.type) != 0) continue; // this logic may be a little off: comtime_int, // comptime_float, and type will all have size 0 because // they can't be used at runtime. On the other hand, trying // to use them here should result in a compile error? Also, // it's a 0 sized type so initializing it as undefined // shouldn't be a problem. As far as I know. if (std.mem.eql(u8, name, field.name)) return @unionInit(T, field.name, undefined); } return error.BadValue; }, else => return error.BadValue, } }, .Optional => |opt| { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; if (options.case_insensitive_scalar_coersion) { for (options.null_scalars) |check| if (std.ascii.eqlIgnoreCase(str, check)) return null; } else { for (options.null_scalars) |check| if (std.mem.eql(u8, str, check)) return null; } return try self.convertTo(opt.child, allocator, options); }, else => return error.BadValue, } }, else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)), } } pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value { return try _fromScalarOrString(alloc, .scalar, input); } pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value { return try _fromScalarOrString(alloc, .string, input); } inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value { return @unionInit(Value, @tagName(classification), try alloc.dupeZ(u8, input)); } pub inline fn emptyScalar() Value { return .{ .scalar = "" }; } pub inline fn emptyString() Value { return .{ .string = "" }; } pub inline fn newList(alloc: std.mem.Allocator) Value { return .{ .list = List.init(alloc) }; } pub inline fn newFlowList(alloc: std.mem.Allocator) Value { return .{ .inline_list = List.init(alloc) }; } pub inline fn newMap(alloc: std.mem.Allocator) Value { return .{ .map = Map.init(alloc) }; } pub inline fn newFlowMap(alloc: std.mem.Allocator) Value { return .{ .inline_map = Map.init(alloc) }; } pub fn recursiveEqualsExact(self: Value, other: Value) bool { if (@as(TagType, self) != other) return false; switch (self) { inline .scalar, .string => |str, tag| return std.mem.eql(u8, str, @field(other, @tagName(tag))), inline .list, .inline_list => |lst, tag| { const olst = @field(other, @tagName(tag)); if (lst.items.len != olst.items.len) return false; for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false; return true; }, inline .map, .inline_map => |map, tag| { const omap = @field(other, @tagName(tag)); if (map.count() != omap.count()) return false; var iter = map.iterator(); var oiter = omap.iterator(); // this loop structure enforces that the maps are in the same order while (iter.next()) |this| { const that = oiter.next() orelse return false; if (!std.mem.eql(u8, this.key_ptr.*, that.key_ptr.*) or !this.value_ptr.recursiveEqualsExact(that.value_ptr.*)) return false; } // the maps are equal if we have also consumed all of the values from // other. return oiter.next() == null; }, } } pub fn printDebug(self: Value) void { self.printRecursive(0); std.debug.print("\n", .{}); } fn printRecursive(self: Value, indent: usize) void { switch (self) { .scalar, .string => |str| { if (std.mem.indexOfScalar(u8, str, '\n')) |_| { var lines = std.mem.splitScalar(u8, str, '\n'); std.debug.print("\n", .{}); while (lines.next()) |line| { std.debug.print( "{[empty]s: >[indent]}{[line]s}{[nl]s}", .{ .empty = "", .indent = indent, .line = line, .nl = if (lines.peek() == null) "" else "\n", }, ); } } else { std.debug.print("{s}", .{str}); } }, .list, .inline_list => |list| { if (list.items.len == 0) { std.debug.print("[]", .{}); return; } std.debug.print("[\n", .{}); for (list.items, 0..) |value, idx| { std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx }); value.printRecursive(indent + 2); std.debug.print(",\n", .{}); } std.debug.print( "{[empty]s: >[indent]}]", .{ .empty = "", .indent = indent }, ); }, .map, .inline_map => |map| { if (map.count() == 0) { std.debug.print("{{}}", .{}); return; } std.debug.print("{{\n", .{}); var iter = map.iterator(); while (iter.next()) |entry| { std.debug.print( "{[empty]s: >[indent]}{[key]s}: ", .{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* }, ); entry.value_ptr.printRecursive(indent + 4); std.debug.print(",\n", .{}); } std.debug.print( "{[empty]s: >[indent]}}}", .{ .empty = "", .indent = indent }, ); }, } } };