parser: change string and | semantics and expose slices in Value
The way I implemented these changes ended up being directly coupled and I am not interested in trying to decouple them, so instead here's a single commit that makes changes to both the API and the format. Let's go over these. | now acts as a direct concatenation operator, rather than concatenating with a space. This is because the format allows the specification of a trailing space (by using | to fence the string just before the newline). So it's now possible to spread a long string without spaces over multiple lines, which couldn't be done before. This does have the downside that the common pattern of concatenating strings with a space now requires some extra trailing line noise. I may introduce a THIRD type of concatenating string (thinking of using + as the prefix) because I am a jerk. We will see. The way multi-line strings are concatenated has changed. Partially this has to do with increasing the simplicity of the aforementioned implementation change (the parser forgets the string type from the tokenizer. This worked before because there would always be a trailing character that could be popped off. But since one type now appends no character, this would have to be tracked through the parsing to determine if a character would need to be popped at the end). But I was also not terribly satisfied with the semantics of multiline strings before. I wrote several words about this in 429734e6e813b225654aa71c283f4a8b4444609f, where I reached the opposite conclusion from what is implemented in this commit. Basically, when different types of string concatenation are mixed, the results may be surprising. The previous approach would append the line terminator at the end of the line specified. The new approach prepends the line terminator at the beginning of the line specified. Since the specifier character is at the beginning of the line, I feel like this reads a little better simply due to the colocation of information. As an example: > first | second > third Would previously have resulted in "first\nsecondthird" but it will now result in "firstsecond\nthird". The only mildly baffling part about this is that the string signifier on the first line has absolutely no impact on the string. In the old design, it was the last line that had no impact. Finally, this commit also changes Value so that it uses []const u8 slices directly to store strings instead of ArrayLists. This is because everything downstream of the value was just reaching into string.items to access the slice directly, so cut out the middleman. It was unintuitive to access a field named .string and get an arraylist rather than a slice, anyway.
This commit is contained in:
parent
7db6094dd5
commit
8dd5463683
@ -26,6 +26,7 @@ pub const State = struct {
|
||||
document: Document,
|
||||
diagnostics: *Diagnostics,
|
||||
value_stack: Stack,
|
||||
string_builder: std.ArrayListUnmanaged(u8),
|
||||
mode: enum { initial, value, done } = .initial,
|
||||
expect_shift: tokenizer.ShiftDirection = .none,
|
||||
dangling_key: ?[]const u8 = null,
|
||||
@ -35,6 +36,7 @@ pub const State = struct {
|
||||
.document = Document.init(allocator),
|
||||
.diagnostics = diagnostics,
|
||||
.value_stack = Stack.init(allocator),
|
||||
.string_builder = std.ArrayListUnmanaged(u8){},
|
||||
};
|
||||
}
|
||||
|
||||
@ -47,7 +49,7 @@ pub const State = struct {
|
||||
|
||||
switch (state.mode) {
|
||||
.initial => switch (options.default_object) {
|
||||
.string => state.document.root = Value.newString(arena_alloc),
|
||||
.string => state.document.root = Value.emptyString(),
|
||||
.list => state.document.root = Value.newList(arena_alloc),
|
||||
.map => state.document.root = Value.newMap(arena_alloc),
|
||||
.fail => {
|
||||
@ -58,14 +60,14 @@ pub const State = struct {
|
||||
},
|
||||
.value => switch (state.value_stack.getLast().*) {
|
||||
// remove the final trailing newline or space
|
||||
.string => |*string| _ = string.popOrNull(),
|
||||
// if we have a dangling -, attach an empty string to it
|
||||
.list => |*list| if (state.expect_shift == .indent) try list.append(Value.newScalar(arena_alloc)),
|
||||
// if we have a dangling "key:", attach an empty string to it
|
||||
.string => |*string| string.* = try state.string_builder.toOwnedSlice(arena_alloc),
|
||||
// if we have a dangling -, attach an empty scalar to it
|
||||
.list => |*list| if (state.expect_shift == .indent) try list.append(Value.emptyScalar()),
|
||||
// if we have a dangling "key:", attach an empty scalar to it
|
||||
.map => |*map| if (state.dangling_key) |dk| try state.putMap(
|
||||
map,
|
||||
dk,
|
||||
Value.newScalar(arena_alloc),
|
||||
Value.emptyScalar(),
|
||||
options.duplicate_key_behavior,
|
||||
),
|
||||
.scalar, .flow_list, .flow_map => {},
|
||||
@ -102,9 +104,9 @@ pub const State = struct {
|
||||
state.document.root = try Value.fromScalar(arena_alloc, str);
|
||||
state.mode = .done;
|
||||
},
|
||||
.line_string, .space_string => |str| {
|
||||
state.document.root = try Value.fromString(arena_alloc, str);
|
||||
try state.document.root.string.append(in_line.lineEnding());
|
||||
.line_string, .concat_string => |str| {
|
||||
state.document.root = Value.emptyString();
|
||||
try state.string_builder.appendSlice(arena_alloc, str);
|
||||
try state.value_stack.append(&state.document.root);
|
||||
state.mode = .value;
|
||||
},
|
||||
@ -126,7 +128,7 @@ pub const State = struct {
|
||||
switch (value) {
|
||||
.empty => state.expect_shift = .indent,
|
||||
.scalar => |str| try rootlist.append(try Value.fromScalar(arena_alloc, str)),
|
||||
.line_string, .space_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)),
|
||||
.line_string, .concat_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)),
|
||||
.flow_list => |str| try rootlist.append(try state.parseFlow(str, .flow_list, dkb)),
|
||||
.flow_map => |str| try rootlist.append(try state.parseFlow(str, .flow_map, dkb)),
|
||||
}
|
||||
@ -144,7 +146,7 @@ pub const State = struct {
|
||||
state.dangling_key = dupekey;
|
||||
},
|
||||
.scalar => |str| try rootmap.put(dupekey, try Value.fromScalar(arena_alloc, str)),
|
||||
.line_string, .space_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)),
|
||||
.line_string, .concat_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)),
|
||||
.flow_list => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_list, dkb)),
|
||||
.flow_map => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_map, dkb)),
|
||||
}
|
||||
@ -163,13 +165,13 @@ pub const State = struct {
|
||||
.string => |*string| {
|
||||
if (line.shift == .indent) {
|
||||
state.diagnostics.length = 1;
|
||||
state.diagnostics.message = "the document contains an invalid indented line in a multiline string";
|
||||
state.diagnostics.message = "the document contains invalid indentation in a multiline string";
|
||||
return error.UnexpectedIndent;
|
||||
}
|
||||
|
||||
if (firstpass and line.shift == .dedent) {
|
||||
// kick off the last trailing space or newline
|
||||
_ = string.pop();
|
||||
// copy the string into the document proper
|
||||
string.* = try state.string_builder.toOwnedSlice(arena_alloc);
|
||||
|
||||
var dedent_depth = line.shift.dedent;
|
||||
while (dedent_depth > 0) : (dedent_depth -= 1)
|
||||
@ -182,9 +184,10 @@ pub const State = struct {
|
||||
.comment => unreachable,
|
||||
.in_line => |in_line| switch (in_line) {
|
||||
.empty => unreachable,
|
||||
.line_string, .space_string => |str| {
|
||||
try string.appendSlice(str);
|
||||
try string.append(in_line.lineEnding());
|
||||
inline .line_string, .concat_string => |str, tag| {
|
||||
if (tag == .line_string)
|
||||
try state.string_builder.append(arena_alloc, '\n');
|
||||
try state.string_builder.appendSlice(arena_alloc, str);
|
||||
},
|
||||
else => {
|
||||
state.diagnostics.length = 1;
|
||||
@ -208,7 +211,7 @@ pub const State = struct {
|
||||
// the first line here creates the state.expect_shift, but the second line
|
||||
// is a valid continuation of the list despite not being indented
|
||||
if (firstpass and (state.expect_shift == .indent and line.shift != .indent))
|
||||
try list.append(Value.newScalar(arena_alloc));
|
||||
try list.append(Value.emptyScalar());
|
||||
|
||||
// Consider:
|
||||
//
|
||||
@ -245,9 +248,9 @@ pub const State = struct {
|
||||
.scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
|
||||
.flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)),
|
||||
.flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)),
|
||||
.line_string, .space_string => |str| {
|
||||
const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str));
|
||||
try new_string.string.append(in_line.lineEnding());
|
||||
.line_string, .concat_string => |str| {
|
||||
const new_string = try appendListGetValue(list, Value.emptyString());
|
||||
try state.string_builder.appendSlice(arena_alloc, str);
|
||||
try state.value_stack.append(new_string);
|
||||
state.expect_shift = .none;
|
||||
},
|
||||
@ -259,7 +262,7 @@ pub const State = struct {
|
||||
switch (value) {
|
||||
.empty => state.expect_shift = .indent,
|
||||
.scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
|
||||
.line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
|
||||
.line_string, .concat_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
|
||||
.flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)),
|
||||
.flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)),
|
||||
}
|
||||
@ -311,7 +314,7 @@ pub const State = struct {
|
||||
state.diagnostics.message = "the document is somehow missing a key (this shouldn't be possible)";
|
||||
return error.Fail;
|
||||
},
|
||||
Value.newScalar(arena_alloc),
|
||||
Value.emptyScalar(),
|
||||
dkb,
|
||||
);
|
||||
state.dangling_key = null;
|
||||
@ -346,10 +349,10 @@ pub const State = struct {
|
||||
.flow_map => |str| {
|
||||
try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .flow_map, dkb), dkb);
|
||||
},
|
||||
.line_string, .space_string => |str| {
|
||||
.line_string, .concat_string => |str| {
|
||||
// string pushes the stack
|
||||
const new_string = try state.putMapGetValue(map, state.dangling_key.?, try Value.fromString(arena_alloc, str), dkb);
|
||||
try new_string.string.append(in_line.lineEnding());
|
||||
const new_string = try state.putMapGetValue(map, state.dangling_key.?, Value.emptyString(), dkb);
|
||||
try state.string_builder.appendSlice(arena_alloc, str);
|
||||
try state.value_stack.append(new_string);
|
||||
state.expect_shift = .none;
|
||||
},
|
||||
@ -388,7 +391,7 @@ pub const State = struct {
|
||||
state.dangling_key = dupekey;
|
||||
},
|
||||
.scalar => |str| try state.putMap(map, dupekey, try Value.fromScalar(arena_alloc, str), dkb),
|
||||
.line_string, .space_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb),
|
||||
.line_string, .concat_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb),
|
||||
.flow_list => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_list, dkb), dkb),
|
||||
.flow_map => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_map, dkb), dkb),
|
||||
}
|
||||
@ -457,7 +460,7 @@ pub const State = struct {
|
||||
',' => {
|
||||
// empty value
|
||||
const tip = try state.getStackTip();
|
||||
try tip.flow_list.append(Value.newScalar(arena_alloc));
|
||||
try tip.flow_list.append(Value.emptyScalar());
|
||||
item_start = idx + 1;
|
||||
},
|
||||
'{' => {
|
||||
@ -491,7 +494,7 @@ pub const State = struct {
|
||||
return error.BadState;
|
||||
};
|
||||
if (finished.flow_list.items.len > 0 or idx > item_start)
|
||||
try finished.flow_list.append(Value.newScalar(arena_alloc));
|
||||
try finished.flow_list.append(Value.emptyScalar());
|
||||
pstate = try state.popFlowStack();
|
||||
},
|
||||
else => {
|
||||
@ -599,7 +602,7 @@ pub const State = struct {
|
||||
try state.putMap(
|
||||
&tip.flow_map,
|
||||
dangling_key.?,
|
||||
Value.newScalar(arena_alloc),
|
||||
Value.emptyScalar(),
|
||||
dkb,
|
||||
);
|
||||
|
||||
@ -641,7 +644,7 @@ pub const State = struct {
|
||||
try state.putMap(
|
||||
&tip.flow_map,
|
||||
dangling_key.?,
|
||||
Value.newScalar(arena_alloc),
|
||||
Value.emptyScalar(),
|
||||
dkb,
|
||||
);
|
||||
|
||||
|
@ -41,7 +41,7 @@ pub fn Parsed(comptime T: type) type {
|
||||
}
|
||||
|
||||
pub const Value = union(enum) {
|
||||
pub const String = std.ArrayList(u8);
|
||||
pub const String = []const u8;
|
||||
pub const Map = std.StringArrayHashMap(Value);
|
||||
pub const List = std.ArrayList(Value);
|
||||
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
||||
@ -57,8 +57,8 @@ pub const Value = union(enum) {
|
||||
switch (@typeInfo(T)) {
|
||||
.Void => {
|
||||
switch (self) {
|
||||
.scalar => |str| return if (str.items.len == 0) void{} else error.BadValue,
|
||||
.string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue,
|
||||
.scalar => |str| return if (str.len == 0) void{} else error.BadValue,
|
||||
.string => |str| return if (options.coerce_strings and str.len == 0) void{} else error.BadValue,
|
||||
else => return error.BadValue,
|
||||
}
|
||||
},
|
||||
@ -67,9 +67,9 @@ pub const Value = union(enum) {
|
||||
inline .scalar, .string => |str, tag| {
|
||||
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
||||
for (options.boolean_strings.truthy) |check|
|
||||
if (std.mem.eql(u8, str.items, check)) return true;
|
||||
if (std.mem.eql(u8, str, check)) return true;
|
||||
for (options.boolean_strings.falsy) |check|
|
||||
if (std.mem.eql(u8, str.items, check)) return false;
|
||||
if (std.mem.eql(u8, str, check)) return false;
|
||||
|
||||
return error.BadValue;
|
||||
},
|
||||
@ -80,8 +80,7 @@ pub const Value = union(enum) {
|
||||
switch (self) {
|
||||
inline .scalar, .string => |str, tag| {
|
||||
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
||||
std.debug.print("'{s}'\n", .{str.items});
|
||||
return try std.fmt.parseInt(T, str.items, 0);
|
||||
return try std.fmt.parseInt(T, str, 0);
|
||||
},
|
||||
else => return error.BadValue,
|
||||
}
|
||||
@ -90,7 +89,7 @@ pub const Value = union(enum) {
|
||||
switch (self) {
|
||||
inline .scalar, .string => |str, tag| {
|
||||
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
||||
return try std.fmt.parseFloat(T, str.items, 0);
|
||||
return try std.fmt.parseFloat(T, str, 0);
|
||||
},
|
||||
else => return error.BadValue,
|
||||
}
|
||||
@ -104,7 +103,7 @@ pub const Value = union(enum) {
|
||||
// probably be solved in the zig stdlib or similar.
|
||||
// TODO: This also doesn't handle sentinels properly.
|
||||
switch (self) {
|
||||
.scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue,
|
||||
.scalar, .string => |str| return if (ptr.child == u8) str else error.BadValue,
|
||||
.list, .flow_list => |lst| {
|
||||
var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
|
||||
errdefer result.deinit();
|
||||
@ -133,9 +132,9 @@ pub const Value = union(enum) {
|
||||
// TODO: This also doesn't handle sentinels properly.
|
||||
switch (self) {
|
||||
.scalar, .string => |str| {
|
||||
if (arr.child == u8 and str.items.len == arr.len) {
|
||||
if (arr.child == u8 and str.len == arr.len) {
|
||||
var result: T = undefined;
|
||||
@memcpy(&result, str.items);
|
||||
@memcpy(&result, str);
|
||||
return result;
|
||||
} else return error.BadValue;
|
||||
},
|
||||
@ -182,7 +181,6 @@ pub const Value = union(enum) {
|
||||
} else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
|
||||
@field(result, field.name) = null;
|
||||
} else {
|
||||
std.debug.print("{s}\n", .{field.name});
|
||||
return error.BadValue;
|
||||
}
|
||||
}
|
||||
@ -216,9 +214,9 @@ pub const Value = union(enum) {
|
||||
switch (self) {
|
||||
inline .scalar, .string => |str, tag| {
|
||||
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
||||
if (std.meta.stringToEnum(T, str.items)) |value| return value;
|
||||
if (std.meta.stringToEnum(T, str)) |value| return value;
|
||||
if (options.allow_numeric_enums) {
|
||||
const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch
|
||||
const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str, 10) catch
|
||||
return error.BadValue;
|
||||
return std.meta.intToEnum(T, parsed) catch error.BadValue;
|
||||
}
|
||||
@ -255,7 +253,7 @@ pub const Value = union(enum) {
|
||||
inline .scalar, .string => |str, tag| {
|
||||
if (tag == .string and !options.coerce_strings) return error.BadValue;
|
||||
for (options.null_strings) |check|
|
||||
if (std.mem.eql(u8, str.items, check)) return null;
|
||||
if (std.mem.eql(u8, str, check)) return null;
|
||||
|
||||
return try self.convertTo(opt.child, allocator, options);
|
||||
},
|
||||
@ -275,17 +273,15 @@ pub const Value = union(enum) {
|
||||
}
|
||||
|
||||
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
||||
var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
|
||||
@field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
|
||||
return res;
|
||||
return @unionInit(Value, @tagName(classification), try alloc.dupe(u8, input));
|
||||
}
|
||||
|
||||
pub inline fn newScalar(alloc: std.mem.Allocator) Value {
|
||||
return .{ .scalar = String.init(alloc) };
|
||||
pub inline fn emptyScalar() Value {
|
||||
return .{ .scalar = "" };
|
||||
}
|
||||
|
||||
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
||||
return .{ .string = String.init(alloc) };
|
||||
pub inline fn emptyString() Value {
|
||||
return .{ .string = "" };
|
||||
}
|
||||
|
||||
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
||||
@ -307,7 +303,7 @@ pub const Value = union(enum) {
|
||||
pub fn recursiveEqualsExact(self: Value, other: Value) bool {
|
||||
if (@as(TagType, self) != other) return false;
|
||||
switch (self) {
|
||||
inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items),
|
||||
inline .scalar, .string => |str, tag| return std.mem.eql(u8, str, @field(other, @tagName(tag))),
|
||||
inline .list, .flow_list => |lst, tag| {
|
||||
const olst = @field(other, @tagName(tag));
|
||||
|
||||
@ -341,8 +337,8 @@ pub const Value = union(enum) {
|
||||
fn printRecursive(self: Value, indent: usize) void {
|
||||
switch (self) {
|
||||
.scalar, .string => |str| {
|
||||
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
||||
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
||||
if (std.mem.indexOfScalar(u8, str, '\n')) |_| {
|
||||
var lines = std.mem.splitScalar(u8, str, '\n');
|
||||
std.debug.print("\n", .{});
|
||||
while (lines.next()) |line| {
|
||||
std.debug.print(
|
||||
@ -356,7 +352,7 @@ pub const Value = union(enum) {
|
||||
);
|
||||
}
|
||||
} else {
|
||||
std.debug.print("{s}", .{str.items});
|
||||
std.debug.print("{s}", .{str});
|
||||
}
|
||||
},
|
||||
.list, .flow_list => |list| {
|
||||
|
@ -23,18 +23,10 @@ pub const InlineItem = union(enum) {
|
||||
empty: void,
|
||||
scalar: []const u8,
|
||||
line_string: []const u8,
|
||||
space_string: []const u8,
|
||||
concat_string: []const u8,
|
||||
|
||||
flow_list: []const u8,
|
||||
flow_map: []const u8,
|
||||
|
||||
pub fn lineEnding(self: InlineItem) u8 {
|
||||
return switch (self) {
|
||||
.line_string => '\n',
|
||||
.space_string => ' ',
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const LineContents = union(enum) {
|
||||
@ -306,7 +298,7 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
||||
return if (char == '>')
|
||||
.{ .line_string = slice }
|
||||
else
|
||||
.{ .space_string = slice };
|
||||
.{ .concat_string = slice };
|
||||
},
|
||||
'[' => {
|
||||
if (buf.len - start < 2 or buf[buf.len - 1] != ']') {
|
||||
|
Loading…
x
Reference in New Issue
Block a user