481 lines
21 KiB
Zig
481 lines
21 KiB
Zig
// Copyright 2023 torque@epicyclic.dev
|
|
//
|
|
// Licensed under the MIT/Expat license. You may not use this file except in
|
|
// compliance with the license. You may obtain a copy of the license at
|
|
//
|
|
// https://spdx.org/licenses/MIT.html
|
|
//
|
|
// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
|
// CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
const std = @import("std");
|
|
const hasFn = if (@hasDecl(std.meta, "trait")) struct {
|
|
fn hasFn(comptime T: type, comptime name: []const u8) bool {
|
|
return std.meta.trait.hasFn(name)(T);
|
|
}
|
|
}.hasFn else std.meta.hasFn;
|
|
|
|
const Options = @import("../parser.zig").Options;
|
|
|
|
pub const Document = struct {
|
|
arena: std.heap.ArenaAllocator,
|
|
root: Value,
|
|
|
|
pub fn init(alloc: std.mem.Allocator) Document {
|
|
return .{
|
|
.arena = std.heap.ArenaAllocator.init(alloc),
|
|
.root = undefined,
|
|
};
|
|
}
|
|
|
|
pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) {
|
|
return .{
|
|
.value = try self.root.convertTo(T, self.arena.allocator(), options),
|
|
.arena = self.arena,
|
|
};
|
|
}
|
|
|
|
pub fn printDebug(self: Document) void {
|
|
return self.root.printDebug();
|
|
}
|
|
|
|
pub fn deinit(self: Document) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
|
|
pub fn Parsed(comptime T: type) type {
|
|
return struct {
|
|
value: T,
|
|
arena: std.heap.ArenaAllocator,
|
|
|
|
pub fn deinit(self: @This()) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
}
|
|
|
|
pub const Value = union(enum) {
|
|
pub const String = [:0]const u8;
|
|
pub const Map = std.StringArrayHashMap(Value);
|
|
pub const List = std.ArrayList(Value);
|
|
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
|
|
|
scalar: String,
|
|
string: String,
|
|
list: List,
|
|
inline_list: List,
|
|
map: Map,
|
|
inline_map: Map,
|
|
|
|
pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
|
|
switch (@typeInfo(T)) {
|
|
.Void => {
|
|
switch (self) {
|
|
.scalar => |str| return if (str.len == 0) void{} else error.BadValue,
|
|
.string => |str| return if (options.coerce_strings and str.len == 0) void{} else error.BadValue,
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Bool => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
if (options.case_insensitive_scalar_coersion) {
|
|
for (options.boolean_scalars.truthy) |check|
|
|
if (std.ascii.eqlIgnoreCase(str, check)) return true;
|
|
for (options.boolean_scalars.falsy) |check|
|
|
if (std.ascii.eqlIgnoreCase(str, check)) return false;
|
|
} else {
|
|
for (options.boolean_scalars.truthy) |check|
|
|
if (std.mem.eql(u8, str, check)) return true;
|
|
for (options.boolean_scalars.falsy) |check|
|
|
if (std.mem.eql(u8, str, check)) return false;
|
|
}
|
|
|
|
return error.BadValue;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Int, .ComptimeInt => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
return try std.fmt.parseInt(T, str, 0);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Float, .ComptimeFloat => {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
return try std.fmt.parseFloat(T, str);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Pointer => |ptr| switch (ptr.size) {
|
|
.Slice => {
|
|
// TODO: There is ambiguity here because a document expecting a list
|
|
// of u8 could parse a string instead. Introduce a special
|
|
// type to use for this? the problem is that it becomes
|
|
// invasive into downstream code. Ultimately this should
|
|
// probably be solved in the zig stdlib or similar.
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (comptime ptr.child == u8) {
|
|
if (comptime ptr.sentinel) |sentinel|
|
|
if (comptime @as(*align(1) const ptr.child, @ptrCast(sentinel)).* != 0)
|
|
return error.BadValue;
|
|
|
|
return str;
|
|
} else {
|
|
return error.BadValue;
|
|
}
|
|
},
|
|
.list, .inline_list => |lst| {
|
|
const result = try allocator.alloc(ptr.child, lst.items.len + @intFromBool(ptr.sentinel != null));
|
|
|
|
for (result[0..lst.items.len], lst.items) |*res, item| {
|
|
res.* = try item.convertTo(ptr.child, allocator, options);
|
|
}
|
|
|
|
if (comptime ptr.sentinel) |sentinel| {
|
|
const sval = @as(*align(1) const ptr.child, @ptrCast(sentinel)).*;
|
|
result[lst.items.len] = sval;
|
|
return result[0..lst.items.len :sval];
|
|
} else {
|
|
return result;
|
|
}
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.One => {
|
|
const result = try allocator.create(ptr.child);
|
|
errdefer allocator.destroy(result);
|
|
result.* = try self.convertTo(ptr.child, allocator, options);
|
|
return result;
|
|
},
|
|
else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)),
|
|
},
|
|
.Array => |arr| {
|
|
// TODO: There is ambiguity here because a document expecting a list
|
|
// of u8 could parse a string instead. Introduce a special
|
|
// type to use for this? the problem is that it becomes
|
|
// invasive into downstream code. Ultimately this should
|
|
// probably be solved in the zig stdlib or similar.
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (arr.child == u8 and str.len == arr.len) {
|
|
var result: T = undefined;
|
|
@memcpy(&result, str);
|
|
return result;
|
|
} else return error.BadValue;
|
|
},
|
|
.list, .inline_list => |lst| {
|
|
if (lst.items.len != arr.len) return error.BadValue;
|
|
|
|
var result: T = undefined;
|
|
for (&result, lst.items) |*res, item| {
|
|
res.* = try item.convertTo(arr.child, allocator, options);
|
|
}
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Struct => |stt| {
|
|
if (comptime hasFn(T, "deserializeNice"))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
if (stt.is_tuple) {
|
|
switch (self) {
|
|
.list, .inline_list => |list| {
|
|
if (list.items.len != stt.fields.len) return error.BadValue;
|
|
var result: T = undefined;
|
|
inline for (stt.fields, &result, list.items) |field, *res, item| {
|
|
res.* = try item.convertTo(field.type, allocator, options);
|
|
}
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
}
|
|
|
|
switch (self) {
|
|
.map, .inline_map => |map| {
|
|
var result: T = undefined;
|
|
|
|
if (options.ignore_extra_fields) {
|
|
inline for (stt.fields) |field| {
|
|
if (map.get(field.name)) |value| {
|
|
@field(result, field.name) = try value.convertTo(field.type, allocator, options);
|
|
} else if (options.allow_omitting_default_values) {
|
|
if (comptime field.default_value) |def|
|
|
@field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
|
|
else
|
|
return error.BadValue;
|
|
} else {
|
|
return error.BadValue;
|
|
}
|
|
}
|
|
} else {
|
|
// TODO: consider not cloning the map here. This would
|
|
// result in the requirement that the raw value object
|
|
// not be used after it has been converted to a type,
|
|
// based on the parse options.
|
|
var clone = try map.clone();
|
|
defer clone.deinit();
|
|
inline for (stt.fields) |field| {
|
|
if (clone.fetchSwapRemove(field.name)) |kv| {
|
|
@field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
|
|
} else if (options.allow_omitting_default_values) {
|
|
if (comptime field.default_value) |def|
|
|
@field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
|
|
else
|
|
return error.BadValue;
|
|
} else return error.BadValue;
|
|
}
|
|
// there were extra fields in the data
|
|
if (clone.count() > 0) return error.BadValue;
|
|
}
|
|
|
|
return result;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Enum => {
|
|
if (comptime hasFn(T, "deserializeNice"))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
const name = if (options.expect_enum_dot) blk: {
|
|
if (str.len > 0 and str[0] == '.')
|
|
break :blk str[1..]
|
|
else
|
|
return error.BadValue;
|
|
} else str;
|
|
|
|
if (std.meta.stringToEnum(T, name)) |value| return value;
|
|
if (options.allow_numeric_enums) {
|
|
const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str, 10) catch
|
|
return error.BadValue;
|
|
return std.meta.intToEnum(T, parsed) catch error.BadValue;
|
|
}
|
|
return error.BadValue;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Union => |unn| {
|
|
if (comptime hasFn(T, "deserializeNice"))
|
|
return T.deserializeNice(self, allocator, options);
|
|
|
|
if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));
|
|
|
|
switch (self) {
|
|
.map, .inline_map => |map| {
|
|
// a union may not ever be deserialized from a map with more
|
|
// (or less) than one value
|
|
if (map.count() != 1) return error.BadValue;
|
|
const key = map.keys()[0];
|
|
const name = if (options.expect_enum_dot) blk: {
|
|
if (key.len > 0 and key[0] == '.')
|
|
break :blk key[1..]
|
|
else
|
|
return error.BadValue;
|
|
} else key;
|
|
|
|
inline for (unn.fields) |field| {
|
|
if (std.mem.eql(u8, name, field.name))
|
|
return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
|
|
}
|
|
return error.BadValue;
|
|
},
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
const name = if (options.expect_enum_dot) blk: {
|
|
if (str.len > 0 and str[0] == '.')
|
|
break :blk str[1..]
|
|
else
|
|
return error.BadValue;
|
|
} else str;
|
|
|
|
inline for (unn.fields) |field| {
|
|
if (@sizeOf(field.type) != 0) continue;
|
|
// this logic may be a little off: comtime_int,
|
|
// comptime_float, and type will all have size 0 because
|
|
// they can't be used at runtime. On the other hand, trying
|
|
// to use them here should result in a compile error? Also,
|
|
// it's a 0 sized type so initializing it as undefined
|
|
// shouldn't be a problem. As far as I know.
|
|
if (std.mem.eql(u8, name, field.name))
|
|
return @unionInit(T, field.name, undefined);
|
|
}
|
|
return error.BadValue;
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
.Optional => |opt| {
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| {
|
|
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
|
if (options.case_insensitive_scalar_coersion) {
|
|
for (options.null_scalars) |check|
|
|
if (std.ascii.eqlIgnoreCase(str, check)) return null;
|
|
} else {
|
|
for (options.null_scalars) |check|
|
|
if (std.mem.eql(u8, str, check)) return null;
|
|
}
|
|
|
|
return try self.convertTo(opt.child, allocator, options);
|
|
},
|
|
else => return error.BadValue,
|
|
}
|
|
},
|
|
else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)),
|
|
}
|
|
}
|
|
|
|
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .scalar, input);
|
|
}
|
|
|
|
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .string, input);
|
|
}
|
|
|
|
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
|
return @unionInit(Value, @tagName(classification), try alloc.dupeZ(u8, input));
|
|
}
|
|
|
|
pub inline fn emptyScalar() Value {
|
|
return .{ .scalar = "" };
|
|
}
|
|
|
|
pub inline fn emptyString() Value {
|
|
return .{ .string = "" };
|
|
}
|
|
|
|
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
|
return .{ .list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
|
|
return .{ .inline_list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .map = Map.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .inline_map = Map.init(alloc) };
|
|
}
|
|
|
|
pub fn recursiveEqualsExact(self: Value, other: Value) bool {
|
|
if (@as(TagType, self) != other) return false;
|
|
switch (self) {
|
|
inline .scalar, .string => |str, tag| return std.mem.eql(u8, str, @field(other, @tagName(tag))),
|
|
inline .list, .inline_list => |lst, tag| {
|
|
const olst = @field(other, @tagName(tag));
|
|
|
|
if (lst.items.len != olst.items.len) return false;
|
|
for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false;
|
|
return true;
|
|
},
|
|
inline .map, .inline_map => |map, tag| {
|
|
const omap = @field(other, @tagName(tag));
|
|
|
|
if (map.count() != omap.count()) return false;
|
|
var iter = map.iterator();
|
|
var oiter = omap.iterator();
|
|
// this loop structure enforces that the maps are in the same order
|
|
while (iter.next()) |this| {
|
|
const that = oiter.next() orelse return false;
|
|
if (!std.mem.eql(u8, this.key_ptr.*, that.key_ptr.*) or !this.value_ptr.recursiveEqualsExact(that.value_ptr.*)) return false;
|
|
}
|
|
// the maps are equal if we have also consumed all of the values from
|
|
// other.
|
|
return oiter.next() == null;
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn printDebug(self: Value) void {
|
|
self.printRecursive(0);
|
|
std.debug.print("\n", .{});
|
|
}
|
|
|
|
fn printRecursive(self: Value, indent: usize) void {
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (std.mem.indexOfScalar(u8, str, '\n')) |_| {
|
|
var lines = std.mem.splitScalar(u8, str, '\n');
|
|
std.debug.print("\n", .{});
|
|
while (lines.next()) |line| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
|
.{
|
|
.empty = "",
|
|
.indent = indent,
|
|
.line = line,
|
|
.nl = if (lines.peek() == null) "" else "\n",
|
|
},
|
|
);
|
|
}
|
|
} else {
|
|
std.debug.print("{s}", .{str});
|
|
}
|
|
},
|
|
.list, .inline_list => |list| {
|
|
if (list.items.len == 0) {
|
|
std.debug.print("[]", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("[\n", .{});
|
|
for (list.items, 0..) |value, idx| {
|
|
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
|
value.printRecursive(indent + 2);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}]",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
.map, .inline_map => |map| {
|
|
if (map.count() == 0) {
|
|
std.debug.print("{{}}", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("{{\n", .{});
|
|
|
|
var iter = map.iterator();
|
|
|
|
while (iter.next()) |entry| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[key]s}: ",
|
|
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
|
);
|
|
entry.value_ptr.printRecursive(indent + 4);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}}}",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
}
|
|
}
|
|
};
|