Compare commits

..

3 Commits

Author SHA1 Message Date
3b68f1dc7a
config: add terminated strings
This was the final feature I wanted to add to the format. Also some
other things have been cleaned up a little bit (for example, the
inline parser does not need the dangling key to be attached to each
stack level just like the normal parser doesn't). There was also an
off-by-one error that bugged out detecting the pathological case of a
flow list consisting of only an empty string (`[ ]`, not to be
mistaken for the empty list `[]`).

Mixed multiline strings are a bit confusing but internally consistent.

    > what character does this string end with?
    |

ends with a newline character because that's the style of the
second-to-last line. However, seeing | last makes my brain think it
should end with a space. The reason it ends with a newline is because
our concatenation strategy consists of appending to the string early
(as soon as a line is added) rather than lazily. This is a tradeoff,
though.  while lazy appending would make this result more intuitive
(the string would end with a space) and it would allow us to remove
the self-proclaimed cheesy hack, it would make the opposite boundary
condition a confusing:

    >
    | what character does this string start with?

With lazy appending, this string would start with a space
(despite > making it look like it should start have a leading
newline). While both of these are likely to be uncommon edge cases, it
doesn't seem we can have it both ways. Of the two options, I think the
current logic is a little bit more clear.
2023-09-17 23:09:26 -07:00
8b5a0114ef
config: allow nested flow structures
This was kind of a pain in the butt to implement because it basically
required a second full state machine parser (though this one is a bit
simpler since there are less possible value types). It seems likely to
me that I will probably shove this directly into the main parser
struct at some point in the near future.
2023-09-17 19:47:18 -07:00
ec875ef1f7
config: fix several things
There was no actual check that lines weren't being indented too far.
Inline strings weren't having their trailing newline get chopped.
Printing is still janky, but it's better than it was.
2023-09-17 19:30:58 -07:00

View File

@ -1,7 +1,7 @@
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
//
// - Doesn't support multiline keys (this means map keys cannot start with
// ' ', \t, #, {, [, or >, and they cannot contain :)
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
// - Allows using tabs for indentation (but not mixed tabs/spaces)
// - Indentation must be quantized consistently throughout the document. e.g.
// every nested layer being exactly 2 spaces past its parent. Tabs may
@ -54,6 +54,9 @@
// Like multiline strings, the final space is stripped (I guess this is a very
// janky way to add trailing whitespace to a string).
//
// - terminated strings to allow trailing whitespace:
// | this string has trailing whitespace |
// > and so does this one |
// - The parser is both strict and probably sloppy and may have weird edge
// cases since I'm slinging code, not writing a spec. For example, tabs are
// not trimmed from the values of inline lists/maps
@ -79,6 +82,7 @@ pub const LineTokenizer = struct {
BadToken,
MixedIndentation,
UnquantizedIndentation,
TooMuchIndentation,
MissingNewline,
TrailingWhitespace,
Impossible,
@ -93,10 +97,19 @@ pub const LineTokenizer = struct {
const InlineItem = union(enum) {
empty: void,
scalar: []const u8,
string: []const u8,
line_string: []const u8,
space_string: []const u8,
flow_list: []const u8,
flow_map: []const u8,
fn lineEnding(self: InlineItem) u8 {
return switch (self) {
.line_string => '\n',
.space_string => ' ',
else => unreachable,
};
}
};
const LineContents = union(enum) {
@ -199,9 +212,11 @@ pub const LineTokenizer = struct {
break :blk @divExact(indent, self.indentation.spaces);
} else indent;
const relative: RelativeIndent = if (quantized > self.last_indent)
.indent
else if (quantized < self.last_indent)
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
if ((quantized - self.last_indent) > 1)
return error.TooMuchIndentation;
break :rel .indent;
} else if (quantized < self.last_indent)
.{ .dedent = self.last_indent - quantized }
else
.none;
@ -285,26 +300,38 @@ pub const LineTokenizer = struct {
if (buf.len == 0) return .empty;
switch (buf[0]) {
'|', '>' => |char| {
'>', '|' => |char| {
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
return if (buf.len == 1) .{
.string = if (char == '|') buf[1..] else buf.ptr[1 .. buf.len + 1],
} else .{
.string = if (char == '|') buf[2..] else buf.ptr[2 .. buf.len + 1],
const slice: []const u8 = switch (buf[buf.len - 1]) {
' ', '\t' => return error.TrailingWhitespace,
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
else => buf[@min(2, buf.len)..buf.len],
};
return if (char == '>')
.{ .line_string = slice }
else
.{ .space_string = slice };
},
'[' => {
if (buf.len < 2 or buf[buf.len - 1] != ']') return error.BadToken;
if (buf.len < 2 or buf[buf.len - 1] != ']')
return error.BadToken;
return .{ .flow_list = buf[1 .. buf.len - 1] };
// keep the closing ] for the flow parser
return .{ .flow_list = buf[1..] };
},
'{' => {
if (buf.len < 2 or buf[buf.len - 1] != '}') return error.BadToken;
if (buf.len < 2 or buf[buf.len - 1] != '}')
return error.BadToken;
return .{ .flow_map = buf[1 .. buf.len - 1] };
// keep the closing } fpr the flow parser
return .{ .flow_map = buf[1..] };
},
else => {
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
return error.TrailingWhitespace;
return .{ .scalar = buf };
},
}
@ -341,7 +368,7 @@ pub const Parser = struct {
DuplicateKey,
BadMapEntry,
Fail,
} || LineTokenizer.Error || std.mem.Allocator.Error;
} || LineTokenizer.Error || FlowParser.Error || std.mem.Allocator.Error;
pub const DuplicateKeyBehavior = enum {
use_first,
@ -364,38 +391,59 @@ pub const Parser = struct {
list: List(Value),
map: Map(Value),
pub fn printDebug(self: Value) void {
return self.printRecursive(0);
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
var res: Value = .{ .string = try std.ArrayList(u8).initCapacity(alloc, input.len) };
res.string.appendSliceAssumeCapacity(input);
return res;
}
pub inline fn newString(alloc: std.mem.Allocator) Value {
return .{ .string = std.ArrayList(u8).init(alloc) };
}
pub inline fn newList(alloc: std.mem.Allocator) Value {
return .{ .list = List(Value).init(alloc) };
}
pub inline fn newMap(alloc: std.mem.Allocator) Value {
return .{ .map = Map(Value).init(alloc) };
}
pub fn printDebug(self: Value) void {
self.printRecursive(0);
std.debug.print("\n", .{});
}
fn printRecursive(self: Value, indent: usize) void {
switch (self) {
.string => |str| {
var lines = std.mem.splitScalar(u8, str.items, '\n');
std.debug.print(
"{[line]s}{[nl]s}",
.{
.line = lines.first(),
.nl = if (lines.peek() == null) "" else "\n",
},
);
while (lines.next()) |line| {
std.debug.print(
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
.{
.empty = "",
.indent = indent + 0,
.line = line,
.nl = if (lines.peek() == null) "" else "\n",
},
);
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
var lines = std.mem.splitScalar(u8, str.items, '\n');
std.debug.print("\n", .{});
while (lines.next()) |line| {
std.debug.print(
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
.{
.empty = "",
.indent = indent,
.line = line,
.nl = if (lines.peek() == null) "" else "\n",
},
);
}
} else {
std.debug.print("{s}", .{str.items});
}
},
.list => |list| {
std.debug.print(
"{[empty]s: >[indent]}[\n",
.{ .empty = "", .indent = indent },
);
for (list.items) |value| {
if (list.items.len == 0) {
std.debug.print("[]", .{});
return;
}
std.debug.print("[\n", .{});
for (list.items, 0..) |value, idx| {
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
value.printRecursive(indent + 2);
std.debug.print(",\n", .{});
}
@ -405,10 +453,12 @@ pub const Parser = struct {
);
},
.map => |map| {
std.debug.print(
"{[empty]s: >[indent]}{{\n",
.{ .empty = "", .indent = indent },
);
if (map.count() == 0) {
std.debug.print("{{}}", .{});
return;
}
std.debug.print("{{\n", .{});
var iter = map.iterator();
@ -458,7 +508,7 @@ pub const Parser = struct {
var state: ParseState = .initial;
var expect_shift: LineTokenizer.ShiftDirection = .none;
var empty_key: ?[]const u8 = null;
var dangling_key: ?[]const u8 = null;
var stack = std.ArrayList(*Value).init(arena_alloc);
defer stack.deinit();
@ -482,21 +532,26 @@ pub const Parser = struct {
.empty => unreachable,
.scalar => |str| {
document.root = try valueFromString(arena_alloc, str);
// this is a cheesy hack. If the document consists
// solely of a scalar, the finalizer will try to
// chop a line ending off of it, so we need to add
// a sacrificial padding character to avoid
// chopping off something that matters.
try document.root.string.append(' ');
state = .done;
},
.string => |str| {
.line_string, .space_string => |str| {
document.root = try valueFromString(arena_alloc, str);
// cheesy technique for differentiating the different string types
if (str[str.len - 1] != '\n') try document.root.string.append(' ');
try document.root.string.append(in_line.lineEnding());
try stack.append(&document.root);
state = .value;
},
.flow_list => |str| {
document.root = try parseFlowList(arena_alloc, str);
document.root = try parseFlowList(arena_alloc, str, self.dupe_behavior);
state = .done;
},
.flow_map => |str| {
document.root = try self.parseFlowMap(arena_alloc, str);
document.root = try parseFlowMap(arena_alloc, str, self.dupe_behavior);
state = .done;
},
},
@ -509,16 +564,16 @@ pub const Parser = struct {
expect_shift = .indent;
state = .value;
},
.string, .scalar => |str| {
.line_string, .space_string, .scalar => |str| {
try document.root.list.append(try valueFromString(arena_alloc, str));
state = .value;
},
.flow_list => |str| {
try document.root.list.append(try parseFlowList(arena_alloc, str));
try document.root.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior));
state = .value;
},
.flow_map => |str| {
try document.root.list.append(try self.parseFlowMap(arena_alloc, str));
try document.root.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior));
state = .value;
},
}
@ -536,21 +591,21 @@ pub const Parser = struct {
// key somewhere until we can consume the
// value. More parser state to lug along.
empty_key = pair.key;
dangling_key = pair.key;
state = .value;
},
.string, .scalar => |str| {
.line_string, .space_string, .scalar => |str| {
// we can do direct puts here because this is
// the very first line of the document
try document.root.map.put(pair.key, try valueFromString(arena_alloc, str));
state = .value;
},
.flow_list => |str| {
try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str));
try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior));
state = .value;
},
.flow_map => |str| {
try document.root.map.put(pair.key, try self.parseFlowMap(arena_alloc, str));
try document.root.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior));
state = .value;
},
}
@ -559,9 +614,12 @@ pub const Parser = struct {
},
.value => switch (stack.getLast().*) {
.string => |*string| {
if (line.indent == .indent) return error.UnexpectedIndent;
if (line.indent == .indent)
return error.UnexpectedIndent;
if (!flop and line.indent == .dedent) {
// TODO: remove final newline or trailing space here
// kick off the last trailing space or newline
_ = string.pop();
var dedent_depth = line.indent.dedent;
while (dedent_depth > 0) : (dedent_depth -= 1)
@ -574,9 +632,9 @@ pub const Parser = struct {
.comment => unreachable,
.in_line => |in_line| switch (in_line) {
.empty => unreachable,
.string => |str| {
.line_string, .space_string => |str| {
try string.appendSlice(str);
if (str[str.len - 1] != '\n') try string.append(' ');
try string.append(in_line.lineEnding());
},
else => return error.UnexpectedValue,
},
@ -584,14 +642,21 @@ pub const Parser = struct {
}
},
.list => |*list| {
// detect that the previous item was actually empty
//
// -
// - something
//
// the first line here creates the expect_shift, but the second line
// is a valid continuation of the list despite not being indented
if (expect_shift == .indent and line.indent != .indent)
try list.append(try valueFromString(arena_alloc, ""));
// Consider:
//
// -
// own-line scalar
// - inline scalar
// -
// own-line scalar
// - inline scalar
//
// the own-line scalar will not push the stack but the next list item will be a dedent
if (!flop and line.indent == .dedent) {
@ -618,14 +683,13 @@ pub const Parser = struct {
switch (in_line) {
.empty => unreachable,
.scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try list.append(try self.parseFlowMap(arena_alloc, str)),
.string => |str| {
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.line_string, .space_string => |str| {
// string pushes the stack
const new_string = try appendListGetValue(list, try valueFromString(arena_alloc, str));
if (str[str.len - 1] != '\n')
try new_string.string.append(' ');
try new_string.string.append(in_line.lineEnding());
try stack.append(new_string);
expect_shift = .none;
@ -639,9 +703,9 @@ pub const Parser = struct {
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.scalar, .string => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try list.append(try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
}
},
// a new list is being created
@ -655,9 +719,9 @@ pub const Parser = struct {
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.scalar, .string => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_list.list.append(try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
}
},
}
@ -681,24 +745,32 @@ pub const Parser = struct {
switch (pair.val) {
.empty => {
empty_key = pair.key;
dangling_key = pair.key;
expect_shift = .indent;
},
.scalar, .string => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_map.map.put(pair.key, try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
}
},
}
},
.map => |*map| {
// detect that the previous item was actually empty
//
// foo:
// bar: baz
//
// the first line here creates the expect_shift, but the second line
// is a valid continuation of the map despite not being indented
if (expect_shift == .indent and line.indent != .indent) {
try self.putMapKey(
try putMap(
map,
empty_key orelse return error.Fail,
dangling_key orelse return error.Fail,
try valueFromString(arena_alloc, ""),
self.dupe_behavior,
);
empty_key = null;
dangling_key = null;
}
if (!flop and line.indent == .dedent) {
@ -715,28 +787,28 @@ pub const Parser = struct {
.in_line => |in_line| {
// assert that this line has been indented. this is required for an inline value when
// the stack is in map mode.
if (expect_shift != .indent or line.indent != .indent or empty_key == null)
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
return error.UnexpectedValue;
expect_shift = .dedent;
switch (in_line) {
.empty => unreachable,
.scalar => |str| try self.putMapKey(map, empty_key.?, try valueFromString(arena_alloc, str)),
.flow_list => |str| try self.putMapKey(map, empty_key.?, try parseFlowList(arena_alloc, str)),
.scalar => |str| try putMap(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior),
.flow_list => |str| try putMap(map, dangling_key.?, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
.flow_map => |str| {
try self.putMapKey(map, empty_key.?, try self.parseFlowMap(arena_alloc, str));
try putMap(map, dangling_key.?, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior);
},
.string => |str| {
.line_string, .space_string => |str| {
// string pushes the stack
const new_string = try self.putMapKeyGetValue(map, empty_key.?, try valueFromString(arena_alloc, str));
if (str[str.len - 1] != '\n') try new_string.string.append(' ');
const new_string = try putMapGetValue(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior);
try new_string.string.append(in_line.lineEnding());
try stack.append(new_string);
expect_shift = .none;
},
}
empty_key = null;
dangling_key = null;
},
.list_item => |value| {
// this prong cannot be hit on dedent in a valid way.
@ -747,19 +819,19 @@ pub const Parser = struct {
//
// dedenting back to the map stack level requires map_item
if (expect_shift != .indent or line.indent != .indent or empty_key == null)
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
return error.UnexpectedValue;
const new_list = try self.putMapKeyGetValue(map, empty_key.?, .{ .list = List(Value).init(arena_alloc) });
const new_list = try putMapGetValue(map, dangling_key.?, .{ .list = List(Value).init(arena_alloc) }, self.dupe_behavior);
try stack.append(new_list);
empty_key = null;
dangling_key = null;
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.scalar, .string => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_list.list.append(try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
}
},
.map_item => |pair| {
@ -769,28 +841,28 @@ pub const Parser = struct {
.none, .dedent => switch (pair.val) {
.empty => {
expect_shift = .indent;
empty_key = pair.key;
dangling_key = pair.key;
},
.scalar, .string => |str| try self.putMapKey(map, pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try self.putMapKey(map, pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try self.putMapKey(map, pair.key, try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try putMap(map, pair.key, try valueFromString(arena_alloc, str), self.dupe_behavior),
.flow_list => |str| try putMap(map, pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
.flow_map => |str| try putMap(map, pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
},
// a new map is being created
.indent => {
if (expect_shift != .indent or empty_key == null) return error.UnexpectedValue;
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
const new_map = try self.putMapKeyGetValue(map, empty_key.?, .{ .map = Map(Value).init(arena_alloc) });
const new_map = try putMapGetValue(map, dangling_key.?, .{ .map = Map(Value).init(arena_alloc) }, self.dupe_behavior);
try stack.append(new_map);
empty_key = null;
dangling_key = null;
switch (pair.val) {
.empty => {
expect_shift = .indent;
empty_key = pair.key;
dangling_key = pair.key;
},
.scalar, .string => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_map.map.put(pair.key, try self.parseFlowMap(arena_alloc, str)),
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
}
},
}
@ -816,11 +888,11 @@ pub const Parser = struct {
},
.value => switch (stack.getLast().*) {
// remove the final trailing newline or space
.string => |*string| _ = string.pop(),
.string => |*string| _ = string.popOrNull(),
// if we have a dangling -, attach an empty string to it
.list => |*list| if (expect_shift == .indent) try list.append(try valueFromString(arena_alloc, "")),
// if we have a dangling key:, attach an empty string to it
.map => |*map| if (empty_key) |ek| try self.putMapKey(map, ek, try valueFromString(arena_alloc, "")),
// if we have a dangling "key:", attach an empty string to it
.map => |*map| if (dangling_key) |dk| try putMap(map, dk, try valueFromString(arena_alloc, ""), self.dupe_behavior),
},
.done => {},
}
@ -834,42 +906,18 @@ pub const Parser = struct {
return result;
}
fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8) Error!Value {
// TODO: if we pass in the parse stack, is it straightforward to support nested
// lists/maps? Can seek the split iterator by manually setting index.
var result: Value = .{ .list = List(Value).init(alloc) };
fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
var parser = try FlowParser.initList(alloc, contents);
defer parser.deinit();
// TODO: consume exactly one space after the comma
var splitter = std.mem.splitScalar(u8, contents, ',');
while (splitter.next()) |entry| {
try result.list.append(
try valueFromString(alloc, std.mem.trim(u8, entry, " ")),
);
}
return result;
return try parser.parse(dupe_behavior);
}
fn parseFlowMap(self: *Parser, alloc: std.mem.Allocator, contents: []const u8) Error!Value {
var result: Value = .{ .map = Map(Value).init(alloc) };
fn parseFlowMap(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
var parser = try FlowParser.initMap(alloc, contents);
defer parser.deinit();
var splitter = std.mem.splitScalar(u8, contents, ',');
while (splitter.next()) |entry| {
const trimmed = std.mem.trim(u8, entry, " ");
// TODO: consume exactly one space after the colon?
const colon = std.mem.indexOfScalar(u8, trimmed, ':') orelse
return error.BadMapEntry;
try self.putMapKey(
&result.map,
trimmed[0..colon],
try valueFromString(alloc, std.mem.trimLeft(u8, trimmed[colon + 1 .. trimmed.len], " ")),
);
}
return result;
return try parser.parse(dupe_behavior);
}
inline fn appendListGetValue(list: *List(Value), value: Value) Error!*Value {
@ -877,15 +925,15 @@ pub const Parser = struct {
return &list.items[list.items.len - 1];
}
inline fn putMapKey(self: *Parser, map: *Map(Value), key: []const u8, value: Value) Error!void {
_ = try self.putMapKeyGetValue(map, key, value);
inline fn putMap(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void {
_ = try putMapGetValue(map, key, value, dupe_behavior);
}
inline fn putMapKeyGetValue(self: *Parser, map: *Map(Value), key: []const u8, value: Value) Error!*Value {
inline fn putMapGetValue(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value {
const gop = try map.getOrPut(key);
if (gop.found_existing)
switch (self.dupe_behavior) {
switch (dupe_behavior) {
.fail => return error.DuplicateKey,
.use_first => {},
.use_last => gop.value_ptr.* = value,
@ -939,3 +987,306 @@ pub const Parser = struct {
});
}
};
pub const FlowParser = struct {
pub const Value = Parser.Value;
const FlowStackItem = struct {
value: *Value,
// lists need this. maps do also for keys and values.
item_start: usize = 0,
};
const FlowStack: type = std.ArrayList(FlowStackItem);
buffer: []const u8,
root: Value,
alloc: std.mem.Allocator,
stack: FlowStack,
state: ParseState,
// make this an ugly state machine parser
const ParseState = enum {
want_list_item,
consuming_list_item,
want_list_separator,
want_map_key,
consuming_map_key,
want_map_value,
consuming_map_value,
want_map_separator,
done,
};
const Error = error{
BadState,
BadToken,
} || std.mem.Allocator.Error;
pub fn initList(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
return .{
.buffer = buffer,
.root = undefined,
.alloc = alloc,
.stack = undefined,
.state = .want_list_item,
};
}
pub fn initMap(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
return .{
.buffer = buffer,
.root = undefined,
.alloc = alloc,
.stack = undefined,
.state = .want_map_key,
};
}
pub fn deinit(self: *FlowParser) void {
self.stack.deinit();
}
inline fn getStackTip(stack: FlowStack) Error!*FlowStackItem {
if (stack.items.len == 0) return error.BadState;
return &stack.items[stack.items.len - 1];
}
inline fn setStackItemStart(stack: FlowStack, start: usize) Error!void {
if (stack.items.len == 0) return error.BadState;
stack.items[stack.items.len - 1].item_start = start;
}
inline fn popStack(self: *FlowParser, idx: usize) Parser.Error!void {
const finished = self.stack.popOrNull() orelse return error.BadState;
if (finished.value.* == .list) {
// this is not valid if we are in the want_list_separator state because
// there is no trailing comma in that state
if (self.state == .want_list_item and (finished.value.list.items.len > 0 or idx > finished.item_start))
try finished.value.list.append(
try Parser.valueFromString(self.alloc, ""),
)
else if (self.state == .consuming_list_item)
try finished.value.list.append(
try Parser.valueFromString(
self.alloc,
self.buffer[finished.item_start..idx],
),
);
}
const parent = self.stack.getLastOrNull() orelse {
self.state = .done;
return;
};
switch (parent.value.*) {
.list => self.state = .want_list_separator,
.map => self.state = .want_map_separator,
else => return error.BadState,
}
}
pub fn parse(self: *FlowParser, dupe_behavior: Parser.DuplicateKeyBehavior) Parser.Error!Value {
// prime the stack:
switch (self.state) {
.want_list_item => {
self.root = Value.newList(self.alloc);
self.stack = try FlowStack.initCapacity(self.alloc, 1);
self.stack.appendAssumeCapacity(.{ .value = &self.root });
},
.want_map_key => {
self.root = Value.newMap(self.alloc);
self.stack = try FlowStack.initCapacity(self.alloc, 1);
self.stack.appendAssumeCapacity(.{ .value = &self.root });
},
else => {
return error.BadState;
},
}
var dangling_key: ?[]const u8 = null;
charloop: for (self.buffer, 0..) |char, idx| {
// std.debug.print("{s} => {c}\n", .{ @tagName(self.state), char });
switch (self.state) {
.want_list_item => switch (char) {
' ', '\t' => continue :charloop,
',' => {
// empty value
const tip = try getStackTip(self.stack);
try tip.value.list.append(try Value.fromString(self.alloc, ""));
tip.item_start = idx + 1;
},
'{' => {
const tip = try getStackTip(self.stack);
const new_map = try Parser.appendListGetValue(
&tip.value.list,
Value.newMap(self.alloc),
);
tip.item_start = idx;
try self.stack.append(.{ .value = new_map });
self.state = .want_map_key;
},
'[' => {
const tip = try getStackTip(self.stack);
const new_list = try Parser.appendListGetValue(
&tip.value.list,
Value.newList(self.alloc),
);
tip.item_start = idx;
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_list_item;
},
},
.consuming_list_item => switch (char) {
',' => {
const tip = try getStackTip(self.stack);
try tip.value.list.append(
try Value.fromString(self.alloc, self.buffer[tip.item_start..idx]),
);
tip.item_start = idx + 1;
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => continue :charloop,
},
.want_list_separator => switch (char) {
' ', '\t' => continue :charloop,
',' => {
try setStackItemStart(self.stack, idx);
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => return error.BadToken,
},
.want_map_key => switch (char) {
' ', '\t' => continue :charloop,
// forbid these characters so that flow dictionary keys cannot start
// with characters that regular dictionary keys cannot start with
// (even though they're unambiguous in this specific context).
'{', '[', '#', '>', '|', ',' => return error.BadToken,
':' => {
// we have an empty map key
dangling_key = "";
self.state = .want_map_value;
},
'}' => try self.popStack(idx),
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_map_key;
},
},
.consuming_map_key => switch (char) {
':' => {
const tip = try getStackTip(self.stack);
dangling_key = self.buffer[tip.item_start..idx];
self.state = .want_map_value;
},
else => continue :charloop,
},
.want_map_value => switch (char) {
' ', '\t' => continue :charloop,
',' => {
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, ""),
dupe_behavior,
);
dangling_key = null;
self.state = .want_map_key;
},
'[' => {
const tip = try getStackTip(self.stack);
const new_list = try Parser.putMapGetValue(
&tip.value.map,
dangling_key.?,
Value.newList(self.alloc),
dupe_behavior,
);
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
dangling_key = null;
self.state = .want_list_item;
},
'{' => {
const tip = try getStackTip(self.stack);
const new_map = try Parser.putMapGetValue(
&tip.value.map,
dangling_key.?,
Value.newMap(self.alloc),
dupe_behavior,
);
try self.stack.append(.{ .value = new_map });
dangling_key = null;
self.state = .want_map_key;
},
'}' => {
// the value is an empty string and this map is closed
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, ""),
dupe_behavior,
);
dangling_key = null;
try self.popStack(idx);
},
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_map_value;
},
},
.consuming_map_value => switch (char) {
',', '}' => |term| {
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, self.buffer[tip.item_start..idx]),
dupe_behavior,
);
dangling_key = null;
self.state = .want_map_key;
if (term == '}') try self.popStack(idx);
},
else => continue :charloop,
},
.want_map_separator => switch (char) {
' ', '\t' => continue :charloop,
',' => self.state = .want_map_key,
'}' => try self.popStack(idx),
else => return error.BadToken,
},
// the root value was closed but there are characters remaining
// in the buffer
.done => return error.BadState,
}
}
// we ran out of characters while still in the middle of an object
if (self.state != .done) return error.BadState;
return self.root;
}
};