Compare commits

..

No commits in common. "1683197bc0ae77e0cee56557085719544933b24a" and "0028092a4eb07c9e1eab802c7ef1d122b53902c0" have entirely different histories.

3 changed files with 35 additions and 384 deletions

View File

@ -3,8 +3,7 @@ const std = @import("std");
const buffers = @import("./linebuffer.zig");
const tokenizer = @import("./tokenizer.zig");
const State = @import("./parser/state.zig").State;
pub const Document = @import("./parser/value.zig").Document;
pub const Parsed = @import("./parser/value.zig").Parsed;
pub const Document = @import("./parser/state.zig").Document;
pub const Value = @import("./parser/value.zig").Value;
pub const Diagnostics = struct {
@ -41,62 +40,9 @@ pub const Options = struct {
// resulting document root object. The default behavior is to emit an error if the
// document is empty.
default_object: enum { string, list, map, fail } = .fail,
// Only used by the parseTo family of functions.
// If false, and a mapping contains additional keys that do not map to the fields of
// the corresponding object, an error will be raised. By default, additional keys
// will be skipped and no error will be raised. Note that tagged unions must be
// represented by a map with a single key, and having more than one key will always
// be an error, even if this option is set to true.
ignore_extra_fields: bool = true,
// Only used by the parseTo family of functions.
// If true, if a struct field is an optional type and the corresponding mapping key
// does not exist, the object field will be set to `null`. By default, if the
// parsed document is missing a mapping key for a given field, an error will be
// raised instead.
treat_omitted_as_null: bool = false,
// Only used by the parseTo family of functions.
// If true, strings may be coerced into other scalar types, like booleans or
// numbers. By default, only document scalar fields will attempt to coerce to
// non-string values.
coerce_strings: bool = false,
// Only used by the parseTo family of functions.
// Two lists of strings. Truthy strings will be parsed to boolean true. Falsy
// strings will be parsed to boolean false. All other strings will raise an
// error.
boolean_strings: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
.truthy = &.{ "true", "True", "yes", "on" },
.falsy = &.{ "false", "False", "no", "off" },
},
null_strings: []const []const u8 = &.{ "null", "nil", "None" },
// Only used by the parseTo family of functions.
// If true, document scalars that appear to be numbers will attempt to convert into
// enum values as an integer. By default, all enums in the document must be
// specified by name, not by numeric value. Note that conversion by name will always
// be tried first, even if this option is enabled, so if you're stupid enough to do:
//
// const Horrible = enum {
// @"1" = 0,
// @"0" = 1,
// };
//
// then you deserve what you get. And what you'll get is confusing results.
// Also note that this option does not apply to tagged unions, despite those being
// backed by possibly ordered enums.
allow_numeric_enums: bool = false,
};
pub fn parseBuffer(
allocator: std.mem.Allocator,
buffer: []const u8,
diagnostics: *Diagnostics,
options: Options,
) !Document {
pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, diagnostics: *Diagnostics, options: Options) !Document {
var state = State.init(allocator, diagnostics);
defer state.deinit();
errdefer state.document.deinit();
@ -113,17 +59,6 @@ pub fn parseBuffer(
return try state.finish(options);
}
pub fn parseBufferTo(
comptime T: type,
allocator: std.mem.Allocator,
buffer: []const u8,
diagnostics: *Diagnostics,
options: Options,
) !Parsed(T) {
var doc = try parseBuffer(allocator, buffer, diagnostics, options);
return try doc.convertTo(T, options);
}
pub const StreamParser = struct {
linetok: tokenizer.LineTokenizer(buffers.ValidatingLineBuffer),
parse_state: State,

View File

@ -5,9 +5,28 @@ const Error = @import("../parser.zig").Error;
const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
const Options = @import("../parser.zig").Options;
const Diagnostics = @import("../parser.zig").Diagnostics;
const Document = @import("./value.zig").Document;
const Value = @import("./value.zig").Value;
pub const Document = struct {
arena: std.heap.ArenaAllocator,
root: Value,
pub fn init(alloc: std.mem.Allocator) Document {
return .{
.arena = std.heap.ArenaAllocator.init(alloc),
.root = undefined,
};
}
pub fn printDebug(self: Document) void {
return self.root.printDebug();
}
pub fn deinit(self: Document) void {
self.arena.deinit();
}
};
const FlowParseState = enum {
want_list_item,
consuming_list_item,
@ -42,7 +61,7 @@ pub const State = struct {
self.value_stack.deinit();
}
pub fn finish(state: *State, options: Options) !Document {
pub fn finish(state: *State, options: Options) Error!Document {
const arena_alloc = state.document.arena.allocator();
switch (state.mode) {
@ -76,7 +95,7 @@ pub const State = struct {
return state.document;
}
pub fn parseLine(state: *State, line: tokenizer.Line, dkb: DuplicateKeyBehavior) !void {
pub fn parseLine(state: *State, line: tokenizer.Line, dkb: DuplicateKeyBehavior) Error!void {
if (line.contents == .comment) return;
// this gives us a second loop when the stack tip changes (i.e. during dedent or
@ -425,7 +444,7 @@ pub const State = struct {
contents: []const u8,
root_type: Value.TagType,
dkb: DuplicateKeyBehavior,
) !Value {
) Error!Value {
const arena_alloc = state.document.arena.allocator();
var root: Value = switch (root_type) {
@ -443,7 +462,7 @@ pub const State = struct {
else => unreachable,
};
// used to distinguish between [] and [ ], and it also tracks
// used to distinguish betwen [] and [ ], and it also tracks
// a continuous value between different states
var item_start: usize = 0;
var dangling_key: ?[]const u8 = null;
@ -455,7 +474,6 @@ pub const State = struct {
' ', '\t' => continue :charloop,
',' => {
// empty value
// don't check for whitespace here: [ , ] is okay, as is [ , , ]
const tip = try state.getStackTip();
try tip.flow_list.append(Value.newScalar(arena_alloc));
item_start = idx + 1;
@ -500,18 +518,9 @@ pub const State = struct {
},
},
.consuming_list_item => switch (char) {
// consider: detecting trailing whitespace. "[ 1 ]" should
// produce "1" and not "1 " as it currently does, which breaks
// the principle of least astonishment. design: no trailing
// whitespace before "," and only a single space is allowed before "]"
',' => {
if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow list contains whitespace before ,";
return error.TrailingWhitespace;
}
const tip = try state.getStackTip();
try tip.flow_list.append(
try Value.fromScalar(arena_alloc, contents[item_start..idx]),
);
@ -520,23 +529,13 @@ pub const State = struct {
pstate = .want_list_item;
},
']' => {
var end = idx;
if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow list contains extra whitespace before ]";
return error.TrailingWhitespace;
}
end = idx - 1;
}
const finished = state.value_stack.getLastOrNull() orelse {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow list was closed too many times";
return error.BadState;
};
try finished.flow_list.append(
try Value.fromScalar(arena_alloc, contents[item_start..end]),
try Value.fromScalar(arena_alloc, contents[item_start..idx]),
);
pstate = try state.popFlowStack();
},
@ -578,11 +577,6 @@ pub const State = struct {
},
.consuming_map_key => switch (char) {
':' => {
if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow map contains whitespace before :";
return error.TrailingWhitespace;
}
dangling_key = try arena_alloc.dupe(u8, contents[item_start..idx]);
pstate = .want_map_value;
},
@ -650,12 +644,7 @@ pub const State = struct {
},
},
.consuming_map_value => switch (char) {
',' => {
if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow map contains whitespace before ,";
return error.TrailingWhitespace;
}
',', '}' => |term| {
const tip = try state.getStackTip();
try state.putMap(
&tip.flow_map,
@ -665,27 +654,7 @@ pub const State = struct {
);
dangling_key = null;
pstate = .want_map_key;
},
'}' => {
var end = idx;
if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) {
state.diagnostics.length = 1;
state.diagnostics.message = "the flow map contains extra whitespace before }";
return error.TrailingWhitespace;
}
end = idx - 1;
}
const tip = try state.getStackTip();
try state.putMap(
&tip.flow_map,
dangling_key.?,
try Value.fromScalar(arena_alloc, contents[item_start..end]),
dkb,
);
dangling_key = null;
pstate = try state.popFlowStack();
if (term == '}') pstate = try state.popFlowStack();
},
else => continue :charloop,
},
@ -718,7 +687,7 @@ pub const State = struct {
return root;
}
inline fn getStackTip(state: State) !*Value {
inline fn getStackTip(state: State) Error!*Value {
if (state.value_stack.items.len == 0) return {
state.diagnostics.length = 1;
state.diagnostics.message = "this document contains an unexpected bottom of the stack";
@ -727,7 +696,7 @@ pub const State = struct {
return state.value_stack.items[state.value_stack.items.len - 1];
}
inline fn popFlowStack(state: *State) !FlowParseState {
inline fn popFlowStack(state: *State) Error!FlowParseState {
if (state.value_stack.popOrNull() == null) {
state.diagnostics.length = 1;
state.diagnostics.message = "this document contains an unexpected bottom of the stack";
@ -742,16 +711,16 @@ pub const State = struct {
};
}
inline fn appendListGetValue(list: *Value.List, value: Value) !*Value {
inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value {
try list.append(value);
return &list.items[list.items.len - 1];
}
inline fn putMap(state: *State, map: *Value.Map, key: []const u8, value: Value, dkb: DuplicateKeyBehavior) !void {
inline fn putMap(state: *State, map: *Value.Map, key: []const u8, value: Value, dkb: DuplicateKeyBehavior) Error!void {
_ = try state.putMapGetValue(map, key, value, dkb);
}
inline fn putMapGetValue(state: *State, map: *Value.Map, key: []const u8, value: Value, dkb: DuplicateKeyBehavior) !*Value {
inline fn putMapGetValue(state: *State, map: *Value.Map, key: []const u8, value: Value, dkb: DuplicateKeyBehavior) Error!*Value {
const gop = try map.getOrPut(key);
if (gop.found_existing)

View File

@ -1,45 +1,5 @@
const std = @import("std");
const Options = @import("../parser.zig").Options;
pub const Document = struct {
arena: std.heap.ArenaAllocator,
root: Value,
pub fn init(alloc: std.mem.Allocator) Document {
return .{
.arena = std.heap.ArenaAllocator.init(alloc),
.root = undefined,
};
}
pub fn convertTo(self: *Document, comptime T: type, options: Options) !Parsed(T) {
return .{
.value = try self.root.convertTo(T, self.arena.allocator(), options),
.arena = self.arena,
};
}
pub fn printDebug(self: Document) void {
return self.root.printDebug();
}
pub fn deinit(self: Document) void {
self.arena.deinit();
}
};
pub fn Parsed(comptime T: type) type {
return struct {
value: T,
arena: std.heap.ArenaAllocator,
pub fn deinit(self: @This()) void {
self.arena.deinit();
}
};
}
pub const Value = union(enum) {
pub const String = std.ArrayList(u8);
pub const Map = std.StringArrayHashMap(Value);
@ -53,219 +13,6 @@ pub const Value = union(enum) {
map: Map,
flow_map: Map,
pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
switch (@typeInfo(T)) {
.Void => {
switch (self) {
.scalar => |str| return if (str.items.len == 0) void{} else error.BadValue,
.string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue,
else => return error.BadValue,
}
},
.Bool => {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.boolean_strings.truthy) |check|
if (std.mem.eql(u8, str.items, check)) return true;
for (options.boolean_strings.falsy) |check|
if (std.mem.eql(u8, str.items, check)) return false;
return error.BadValue;
},
else => return error.BadValue,
}
},
.Int, .ComptimeInt => {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
std.debug.print("'{s}'\n", .{str.items});
return try std.fmt.parseInt(T, str.items, 0);
},
else => return error.BadValue,
}
},
.Float, .ComptimeFloat => {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
return try std.fmt.parseFloat(T, str.items, 0);
},
else => return error.BadValue,
}
},
.Pointer => |ptr| switch (ptr.size) {
.Slice => {
// TODO: There is ambiguity here because a document expecting a list
// of u8 could parse a string instead. Introduce a special
// type to use for this? the problem is that it becomes
// invasive into downstream code. Ultimately this should
// probably be solved in the zig stdlib or similar.
// TODO: This also doesn't handle sentinels properly.
switch (self) {
.scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue,
.list, .flow_list => |lst| {
var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
errdefer result.deinit();
for (lst.items) |item| {
result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options));
}
return result.toOwnedSlice();
},
else => return error.BadValue,
}
},
.One => {
const result = try allocator.create(ptr.child);
errdefer allocator.destroy(result);
result.* = try self.convertTo(ptr.child, allocator, options);
return result;
},
else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers.
},
.Array => |arr| {
// TODO: There is ambiguity here because a document expecting a list
// of u8 could parse a string instead. Introduce a special
// type to use for this? the problem is that it becomes
// invasive into downstream code. Ultimately this should
// probably be solved in the zig stdlib or similar.
// TODO: This also doesn't handle sentinels properly.
switch (self) {
.scalar, .string => |str| {
if (arr.child == u8 and str.items.len == arr.len) {
var result: T = undefined;
@memcpy(&result, str.items);
return result;
} else return error.BadValue;
},
.list, .flow_list => |lst| {
var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len);
defer storage.deinit();
for (lst.items) |item| {
storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options));
}
// this may result in a big stack allocation, which is not ideal
var result: T = undefined;
@memcpy(&result, storage.items);
return result;
},
else => return error.BadValue,
}
},
.Struct => |stt| {
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
return T.deserializeNice(self, allocator, options);
if (stt.is_tuple) {
switch (self) {
.list, .flow_list => |list| {
if (list.items.len != stt.fields.len) return error.BadValue;
var result: T = undefined;
inline for (stt.fields, 0..) |field, idx| {
result[idx] = try list.items[idx].convertTo(field.type, allocator, options);
}
return result;
},
else => return error.BadValue,
}
}
switch (self) {
.map, .flow_map => |map| {
var result: T = undefined;
if (options.ignore_extra_fields) {
inline for (stt.fields) |field| {
if (map.get(field.name)) |value| {
@field(result, field.name) = try value.convertTo(field.type, allocator, options);
} else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
@field(result, field.name) = null;
} else {
std.debug.print("{s}\n", .{field.name});
return error.BadValue;
}
}
} else {
// we could iterate over each map key and do an exhaustive
// comparison with each struct field name. This would save
// memory and it would probably be a fair amount faster for
// small structs.
var clone = try map.clone();
defer clone.deinit();
inline for (stt.fields) |field| {
if (clone.fetchSwapRemove(field.name)) |kv| {
@field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
} else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
@field(result, field.name) = null;
} else return error.BadValue;
}
// there were extra fields in the data
if (clone.count() > 0) return error.BadValue;
}
return result;
},
else => return error.BadValue,
}
},
.Enum => {
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
return T.deserializeNice(self, allocator, options);
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
if (std.meta.stringToEnum(T, str.items)) |value| return value;
if (options.allow_numeric_enums) {
const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch
return error.BadValue;
return std.meta.intToEnum(T, parsed) catch error.BadValue;
}
return error.BadValue;
},
else => return error.BadValue,
}
},
.Union => |unn| {
if (comptime std.meta.trait.hasFn("deserializeNice")(T))
return T.deserializeNice(self, allocator, options);
if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));
switch (self) {
.map, .flow_map => |map| {
// a union may not ever be deserialized from a map with more than one value
if (map.count() != 1) return error.BadValue;
const key = map.keys()[0];
inline for (unn.fields) |field| {
if (std.mem.eql(u8, key, field.name))
return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
}
return error.BadValue;
},
// TODO: if the field is a 0 width type like void, we could parse it
// directly from a scalar/string value (i.e. a name with no
// corresponding value)
else => return error.BadValue,
}
},
.Optional => |opt| {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.null_strings) |check|
if (std.mem.eql(u8, str.items, check)) return null;
return try self.convertTo(opt.child, allocator, options);
},
else => return error.BadValue,
}
},
else => @compileError("Cannot deserialize into unsupported type " ++ @typeName(T)),
}
}
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
return try _fromScalarOrString(alloc, .scalar, input);
}