There are still some untested codepaths here, but this does seem to work for nontrivial objects, so, woohoo. It's worth noting that this is a recursive implementation (which seems silly after I hand-rolled the non-recursive main parser). The thinking is that if you have a deeply-enough nested object that you run out of stack space here, you probably shouldn't be converting it directly to an object. I may revisit this, though I am still not 100% certain how straightforward it would be to make this nonrecursive with all the weird comptime objects. Basically the "parse stack" would have to be created at comptime.
405 lines
17 KiB
Zig
405 lines
17 KiB
Zig
const std = @import("std");
|
|
|
|
const Options = @import("../parser.zig").Options;
|
|
|
|
pub const Document = struct {
|
|
arena: std.heap.ArenaAllocator,
|
|
root: Value,
|
|
|
|
pub fn init(alloc: std.mem.Allocator) Document {
|
|
return .{
|
|
.arena = std.heap.ArenaAllocator.init(alloc),
|
|
.root = undefined,
|
|
};
|
|
}
|
|
|
|
pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) {
|
|
return .{
|
|
.value = try self.root.convertTo(T, self.arena.allocator(), options),
|
|
.arena = self.arena,
|
|
};
|
|
}
|
|
|
|
pub fn printDebug(self: Document) void {
|
|
return self.root.printDebug();
|
|
}
|
|
|
|
pub fn deinit(self: Document) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
|
|
pub fn Parsed(comptime T: type) type {
|
|
return struct {
|
|
value: T,
|
|
arena: std.heap.ArenaAllocator,
|
|
|
|
pub fn deinit(self: @This()) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
}
|
|
|
|
pub const Value = union(enum) {
|
|
pub const String = std.ArrayList(u8);
|
|
pub const Map = std.StringArrayHashMap(Value);
|
|
pub const List = std.ArrayList(Value);
|
|
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
|
|
|
scalar: String,
|
|
string: String,
|
|
list: List,
|
|
flow_list: List,
|
|
map: Map,
|
|
flow_map: Map,
|
|
|
|
pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
|
|
switch (@typeInfo(T)) {
|
|
.Void => {
|
|
switch (self) {
|
|
.scalar => |str| return if (str.items.len == 0) void{} else error.BadValue,
|
|
.string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue,
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Bool => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
for (options.boolean_strings.truthy) |check|
|
|
if (std.mem.eql(u8, str.items, check)) return true;
|
|
for (options.boolean_strings.falsy) |check|
|
|
if (std.mem.eql(u8, str.items, check)) return false;
|
|
|
|
return error.BadValue;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Int, .ComptimeInt => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
std.debug.print("'{s}'\n", .{str.items});
|
|
return try std.fmt.parseInt(T, str.items, 0);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Float, .ComptimeFloat => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
return try std.fmt.parseFloat(T, str.items, 0);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Pointer => |ptr| switch (ptr.size) {
|
|
.Slice => {
|
|
// TODO: There is ambiguity here because a document expecting a list
|
|
// of u8 could parse a string instead. Introduce a special
|
|
// type to use for this? the problem is that it becomes
|
|
// invasive into downstream code. Ultimately this should
|
|
// probably be solved in the zig stdlib or similar.
|
|
// TODO: This also doesn't handle sentinels properly.
|
|
switch (self) {
|
|
.scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue,
|
|
.list, .flow_list => |lst| {
|
|
var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
|
|
errdefer result.deinit();
|
|
for (lst.items) |item| {
|
|
result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options));
|
|
}
|
|
return result.toOwnedSlice();
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.One => {
|
|
const result = try allocator.create(ptr.child);
|
|
errdefer allocator.destroy(result);
|
|
result.* = try self.convertTo(ptr.child, allocator, options);
|
|
return result;
|
|
},
|
|
else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers.
|
|
},
|
|
.Array => |arr| {
|
|
// TODO: There is ambiguity here because a document expecting a list
|
|
// of u8 could parse a string instead. Introduce a special
|
|
// type to use for this? the problem is that it becomes
|
|
// invasive into downstream code. Ultimately this should
|
|
// probably be solved in the zig stdlib or similar.
|
|
// TODO: This also doesn't handle sentinels properly.
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (arr.child == u8 and str.items.len == arr.len) {
|
|
var result: T = undefined;
|
|
@memcpy(&result, str.items);
|
|
return result;
|
|
} else return error.BadValue;
|
|
},
|
|
.list, .flow_list => |lst| {
|
|
var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len);
|
|
defer storage.deinit();
|
|
for (lst.items) |item| {
|
|
storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options));
|
|
}
|
|
// this may result in a big stack allocation, which is not ideal
|
|
var result: T = undefined;
|
|
@memcpy(&result, storage.items);
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Struct => |stt| {
|
|
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
if (stt.is_tuple) {
|
|
switch (self) {
|
|
.list, .flow_list => |list| {
|
|
if (list.items.len != stt.fields.len) return error.BadValue;
|
|
var result: T = undefined;
|
|
inline for (stt.fields, 0..) |field, idx| {
|
|
result[idx] = try list.items[idx].convertTo(field.type, allocator, options);
|
|
}
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
}
|
|
|
|
switch (self) {
|
|
.map, .flow_map => |map| {
|
|
var result: T = undefined;
|
|
|
|
if (options.ignore_extra_fields) {
|
|
inline for (stt.fields) |field| {
|
|
if (map.get(field.name)) |value| {
|
|
@field(result, field.name) = try value.convertTo(field.type, allocator, options);
|
|
} else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
|
|
@field(result, field.name) = null;
|
|
} else {
|
|
std.debug.print("{s}\n", .{field.name});
|
|
return error.BadValue;
|
|
}
|
|
}
|
|
} else {
|
|
// we could iterate over each map key and do an exhaustive
|
|
// comparison with each struct field name. This would save
|
|
// memory and it would probably be a fair amount faster for
|
|
// small structs.
|
|
var clone = try map.clone();
|
|
defer clone.deinit();
|
|
inline for (stt.fields) |field| {
|
|
if (clone.fetchSwapRemove(field.name)) |kv| {
|
|
@field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
|
|
} else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
|
|
@field(result, field.name) = null;
|
|
} else return error.BadValue;
|
|
}
|
|
// there were extra fields in the data
|
|
if (clone.count() > 0) return error.BadValue;
|
|
}
|
|
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Enum => {
|
|
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
if (std.meta.stringToEnum(T, str.items)) |value| return value;
|
|
if (options.allow_numeric_enums) {
|
|
const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch
|
|
return error.BadValue;
|
|
return std.meta.intToEnum(T, parsed) catch error.BadValue;
|
|
}
|
|
return error.BadValue;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Union => |unn| {
|
|
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));
|
|
|
|
switch (self) {
|
|
.map, .flow_map => |map| {
|
|
// a union may not ever be deserialized from a map with more than one value
|
|
if (map.count() != 1) return error.BadValue;
|
|
const key = map.keys()[0];
|
|
inline for (unn.fields) |field| {
|
|
if (std.mem.eql(u8, key, field.name))
|
|
return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
|
|
}
|
|
return error.BadValue;
|
|
},
|
|
// TODO: if the field is a 0 width type like void, we could parse it
|
|
// directly from a scalar/string value (i.e. a name with no
|
|
// corresponding value)
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Optional => |opt| {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
for (options.null_strings) |check|
|
|
if (std.mem.eql(u8, str.items, check)) return null;
|
|
|
|
return try self.convertTo(opt.child, allocator, options);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)),
|
|
}
|
|
}
|
|
|
|
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .scalar, input);
|
|
}
|
|
|
|
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .string, input);
|
|
}
|
|
|
|
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
|
var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
|
|
@field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
|
|
return res;
|
|
}
|
|
|
|
pub inline fn newScalar(alloc: std.mem.Allocator) Value {
|
|
return .{ .scalar = String.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
|
return .{ .string = String.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
|
return .{ .list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
|
|
return .{ .flow_list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .map = Map.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .flow_map = Map.init(alloc) };
|
|
}
|
|
|
|
pub fn recursiveEqualsExact(self: Value, other: Value) bool {
|
|
if (@as(TagType, self) != other) return false;
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items),
|
|
inline .list, .flow_list => |lst, tag| {
|
|
const olst = @field(other, @tagName(tag));
|
|
|
|
if (lst.items.len != olst.items.len) return false;
|
|
for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false;
|
|
return true;
|
|
},
|
|
inline .map, .flow_map => |map, tag| {
|
|
const omap = @field(other, @tagName(tag));
|
|
|
|
if (map.count() != omap.count()) return false;
|
|
var iter = map.iterator();
|
|
var oiter = omap.iterator();
|
|
// this loop structure enforces that the maps are in the same order
|
|
while (iter.next()) |this| {
|
|
const that = oiter.next() orelse return false;
|
|
if (!std.mem.eql(u8, this.key_ptr.*, that.key_ptr.*) or !this.value_ptr.recursiveEqualsExact(that.value_ptr.*)) return false;
|
|
}
|
|
// the maps are equal if we have also consumed all of the values from
|
|
// other.
|
|
return oiter.next() == null;
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn printDebug(self: Value) void {
|
|
self.printRecursive(0);
|
|
std.debug.print("\n", .{});
|
|
}
|
|
|
|
fn printRecursive(self: Value, indent: usize) void {
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
|
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
|
std.debug.print("\n", .{});
|
|
while (lines.next()) |line| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
|
.{
|
|
.empty = "",
|
|
.indent = indent,
|
|
.line = line,
|
|
.nl = if (lines.peek() == null) "" else "\n",
|
|
},
|
|
);
|
|
}
|
|
} else {
|
|
std.debug.print("{s}", .{str.items});
|
|
}
|
|
},
|
|
.list, .flow_list => |list| {
|
|
if (list.items.len == 0) {
|
|
std.debug.print("[]", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("[\n", .{});
|
|
for (list.items, 0..) |value, idx| {
|
|
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
|
value.printRecursive(indent + 2);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}]",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
.map, .flow_map => |map| {
|
|
if (map.count() == 0) {
|
|
std.debug.print("{{}}", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("{{\n", .{});
|
|
|
|
var iter = map.iterator();
|
|
|
|
while (iter.next()) |entry| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[key]s}: ",
|
|
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
|
);
|
|
entry.value_ptr.printRecursive(indent + 4);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}}}",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
}
|
|
}
|
|
};
|