Compare commits

..

No commits in common. "3b68f1dc7a6ef07f00ac55337c6a9a8e708b174a" and "c8375d6d3ad807a7747e045736642c49451a4ba1" have entirely different histories.

View File

@ -1,7 +1,7 @@
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
//
// - Doesn't support multiline keys (this means map keys cannot start with
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
// ' ', \t, #, {, [, or >, and they cannot contain :)
// - Allows using tabs for indentation (but not mixed tabs/spaces)
// - Indentation must be quantized consistently throughout the document. e.g.
// every nested layer being exactly 2 spaces past its parent. Tabs may
@ -54,9 +54,6 @@
// Like multiline strings, the final space is stripped (I guess this is a very
// janky way to add trailing whitespace to a string).
//
// - terminated strings to allow trailing whitespace:
// | this string has trailing whitespace |
// > and so does this one |
// - The parser is both strict and probably sloppy and may have weird edge
// cases since I'm slinging code, not writing a spec. For example, tabs are
// not trimmed from the values of inline lists/maps
@ -82,7 +79,6 @@ pub const LineTokenizer = struct {
BadToken,
MixedIndentation,
UnquantizedIndentation,
TooMuchIndentation,
MissingNewline,
TrailingWhitespace,
Impossible,
@ -97,19 +93,10 @@ pub const LineTokenizer = struct {
const InlineItem = union(enum) {
empty: void,
scalar: []const u8,
line_string: []const u8,
space_string: []const u8,
string: []const u8,
flow_list: []const u8,
flow_map: []const u8,
fn lineEnding(self: InlineItem) u8 {
return switch (self) {
.line_string => '\n',
.space_string => ' ',
else => unreachable,
};
}
};
const LineContents = union(enum) {
@ -212,11 +199,9 @@ pub const LineTokenizer = struct {
break :blk @divExact(indent, self.indentation.spaces);
} else indent;
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
if ((quantized - self.last_indent) > 1)
return error.TooMuchIndentation;
break :rel .indent;
} else if (quantized < self.last_indent)
const relative: RelativeIndent = if (quantized > self.last_indent)
.indent
else if (quantized < self.last_indent)
.{ .dedent = self.last_indent - quantized }
else
.none;
@ -300,38 +285,26 @@ pub const LineTokenizer = struct {
if (buf.len == 0) return .empty;
switch (buf[0]) {
'>', '|' => |char| {
'|', '>' => |char| {
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
const slice: []const u8 = switch (buf[buf.len - 1]) {
' ', '\t' => return error.TrailingWhitespace,
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
else => buf[@min(2, buf.len)..buf.len],
return if (buf.len == 1) .{
.string = if (char == '|') buf[1..] else buf.ptr[1 .. buf.len + 1],
} else .{
.string = if (char == '|') buf[2..] else buf.ptr[2 .. buf.len + 1],
};
return if (char == '>')
.{ .line_string = slice }
else
.{ .space_string = slice };
},
'[' => {
if (buf.len < 2 or buf[buf.len - 1] != ']')
return error.BadToken;
if (buf.len < 2 or buf[buf.len - 1] != ']') return error.BadToken;
// keep the closing ] for the flow parser
return .{ .flow_list = buf[1..] };
return .{ .flow_list = buf[1 .. buf.len - 1] };
},
'{' => {
if (buf.len < 2 or buf[buf.len - 1] != '}')
return error.BadToken;
if (buf.len < 2 or buf[buf.len - 1] != '}') return error.BadToken;
// keep the closing } fpr the flow parser
return .{ .flow_map = buf[1..] };
return .{ .flow_map = buf[1 .. buf.len - 1] };
},
else => {
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
return error.TrailingWhitespace;
return .{ .scalar = buf };
},
}
@ -368,7 +341,7 @@ pub const Parser = struct {
DuplicateKey,
BadMapEntry,
Fail,
} || LineTokenizer.Error || FlowParser.Error || std.mem.Allocator.Error;
} || LineTokenizer.Error || std.mem.Allocator.Error;
pub const DuplicateKeyBehavior = enum {
use_first,
@ -391,59 +364,38 @@ pub const Parser = struct {
list: List(Value),
map: Map(Value),
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
var res: Value = .{ .string = try std.ArrayList(u8).initCapacity(alloc, input.len) };
res.string.appendSliceAssumeCapacity(input);
return res;
}
pub inline fn newString(alloc: std.mem.Allocator) Value {
return .{ .string = std.ArrayList(u8).init(alloc) };
}
pub inline fn newList(alloc: std.mem.Allocator) Value {
return .{ .list = List(Value).init(alloc) };
}
pub inline fn newMap(alloc: std.mem.Allocator) Value {
return .{ .map = Map(Value).init(alloc) };
}
pub fn printDebug(self: Value) void {
self.printRecursive(0);
std.debug.print("\n", .{});
return self.printRecursive(0);
}
fn printRecursive(self: Value, indent: usize) void {
switch (self) {
.string => |str| {
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
var lines = std.mem.splitScalar(u8, str.items, '\n');
std.debug.print("\n", .{});
while (lines.next()) |line| {
std.debug.print(
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
.{
.empty = "",
.indent = indent,
.line = line,
.nl = if (lines.peek() == null) "" else "\n",
},
);
}
} else {
std.debug.print("{s}", .{str.items});
var lines = std.mem.splitScalar(u8, str.items, '\n');
std.debug.print(
"{[line]s}{[nl]s}",
.{
.line = lines.first(),
.nl = if (lines.peek() == null) "" else "\n",
},
);
while (lines.next()) |line| {
std.debug.print(
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
.{
.empty = "",
.indent = indent + 0,
.line = line,
.nl = if (lines.peek() == null) "" else "\n",
},
);
}
},
.list => |list| {
if (list.items.len == 0) {
std.debug.print("[]", .{});
return;
}
std.debug.print("[\n", .{});
for (list.items, 0..) |value, idx| {
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
std.debug.print(
"{[empty]s: >[indent]}[\n",
.{ .empty = "", .indent = indent },
);
for (list.items) |value| {
value.printRecursive(indent + 2);
std.debug.print(",\n", .{});
}
@ -453,12 +405,10 @@ pub const Parser = struct {
);
},
.map => |map| {
if (map.count() == 0) {
std.debug.print("{{}}", .{});
return;
}
std.debug.print("{{\n", .{});
std.debug.print(
"{[empty]s: >[indent]}{{\n",
.{ .empty = "", .indent = indent },
);
var iter = map.iterator();
@ -508,7 +458,7 @@ pub const Parser = struct {
var state: ParseState = .initial;
var expect_shift: LineTokenizer.ShiftDirection = .none;
var dangling_key: ?[]const u8 = null;
var empty_key: ?[]const u8 = null;
var stack = std.ArrayList(*Value).init(arena_alloc);
defer stack.deinit();
@ -532,26 +482,21 @@ pub const Parser = struct {
.empty => unreachable,
.scalar => |str| {
document.root = try valueFromString(arena_alloc, str);
// this is a cheesy hack. If the document consists
// solely of a scalar, the finalizer will try to
// chop a line ending off of it, so we need to add
// a sacrificial padding character to avoid
// chopping off something that matters.
try document.root.string.append(' ');
state = .done;
},
.line_string, .space_string => |str| {
.string => |str| {
document.root = try valueFromString(arena_alloc, str);
try document.root.string.append(in_line.lineEnding());
// cheesy technique for differentiating the different string types
if (str[str.len - 1] != '\n') try document.root.string.append(' ');
try stack.append(&document.root);
state = .value;
},
.flow_list => |str| {
document.root = try parseFlowList(arena_alloc, str, self.dupe_behavior);
document.root = try parseFlowList(arena_alloc, str);
state = .done;
},
.flow_map => |str| {
document.root = try parseFlowMap(arena_alloc, str, self.dupe_behavior);
document.root = try self.parseFlowMap(arena_alloc, str);
state = .done;
},
},
@ -564,16 +509,16 @@ pub const Parser = struct {
expect_shift = .indent;
state = .value;
},
.line_string, .space_string, .scalar => |str| {
.string, .scalar => |str| {
try document.root.list.append(try valueFromString(arena_alloc, str));
state = .value;
},
.flow_list => |str| {
try document.root.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior));
try document.root.list.append(try parseFlowList(arena_alloc, str));
state = .value;
},
.flow_map => |str| {
try document.root.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior));
try document.root.list.append(try self.parseFlowMap(arena_alloc, str));
state = .value;
},
}
@ -591,21 +536,21 @@ pub const Parser = struct {
// key somewhere until we can consume the
// value. More parser state to lug along.
dangling_key = pair.key;
empty_key = pair.key;
state = .value;
},
.line_string, .space_string, .scalar => |str| {
.string, .scalar => |str| {
// we can do direct puts here because this is
// the very first line of the document
try document.root.map.put(pair.key, try valueFromString(arena_alloc, str));
state = .value;
},
.flow_list => |str| {
try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior));
try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str));
state = .value;
},
.flow_map => |str| {
try document.root.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior));
try document.root.map.put(pair.key, try self.parseFlowMap(arena_alloc, str));
state = .value;
},
}
@ -614,12 +559,9 @@ pub const Parser = struct {
},
.value => switch (stack.getLast().*) {
.string => |*string| {
if (line.indent == .indent)
return error.UnexpectedIndent;
if (line.indent == .indent) return error.UnexpectedIndent;
if (!flop and line.indent == .dedent) {
// kick off the last trailing space or newline
_ = string.pop();
// TODO: remove final newline or trailing space here
var dedent_depth = line.indent.dedent;
while (dedent_depth > 0) : (dedent_depth -= 1)
@ -632,9 +574,9 @@ pub const Parser = struct {
.comment => unreachable,
.in_line => |in_line| switch (in_line) {
.empty => unreachable,
.line_string, .space_string => |str| {
.string => |str| {
try string.appendSlice(str);
try string.append(in_line.lineEnding());
if (str[str.len - 1] != '\n') try string.append(' ');
},
else => return error.UnexpectedValue,
},
@ -642,21 +584,14 @@ pub const Parser = struct {
}
},
.list => |*list| {
// detect that the previous item was actually empty
//
// -
// - something
//
// the first line here creates the expect_shift, but the second line
// is a valid continuation of the list despite not being indented
if (expect_shift == .indent and line.indent != .indent)
try list.append(try valueFromString(arena_alloc, ""));
// Consider:
//
// -
// own-line scalar
// - inline scalar
// -
// own-line scalar
// - inline scalar
//
// the own-line scalar will not push the stack but the next list item will be a dedent
if (!flop and line.indent == .dedent) {
@ -683,13 +618,14 @@ pub const Parser = struct {
switch (in_line) {
.empty => unreachable,
.scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.line_string, .space_string => |str| {
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try list.append(try self.parseFlowMap(arena_alloc, str)),
.string => |str| {
// string pushes the stack
const new_string = try appendListGetValue(list, try valueFromString(arena_alloc, str));
try new_string.string.append(in_line.lineEnding());
if (str[str.len - 1] != '\n')
try new_string.string.append(' ');
try stack.append(new_string);
expect_shift = .none;
@ -703,9 +639,9 @@ pub const Parser = struct {
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.line_string, .space_string, .scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.scalar, .string => |str| try list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try list.append(try self.parseFlowMap(arena_alloc, str)),
}
},
// a new list is being created
@ -719,9 +655,9 @@ pub const Parser = struct {
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.scalar, .string => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_list.list.append(try self.parseFlowMap(arena_alloc, str)),
}
},
}
@ -745,32 +681,24 @@ pub const Parser = struct {
switch (pair.val) {
.empty => {
dangling_key = pair.key;
empty_key = pair.key;
expect_shift = .indent;
},
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.scalar, .string => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_map.map.put(pair.key, try self.parseFlowMap(arena_alloc, str)),
}
},
}
},
.map => |*map| {
// detect that the previous item was actually empty
//
// foo:
// bar: baz
//
// the first line here creates the expect_shift, but the second line
// is a valid continuation of the map despite not being indented
if (expect_shift == .indent and line.indent != .indent) {
try putMap(
try self.putMapKey(
map,
dangling_key orelse return error.Fail,
empty_key orelse return error.Fail,
try valueFromString(arena_alloc, ""),
self.dupe_behavior,
);
dangling_key = null;
empty_key = null;
}
if (!flop and line.indent == .dedent) {
@ -787,28 +715,28 @@ pub const Parser = struct {
.in_line => |in_line| {
// assert that this line has been indented. this is required for an inline value when
// the stack is in map mode.
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
if (expect_shift != .indent or line.indent != .indent or empty_key == null)
return error.UnexpectedValue;
expect_shift = .dedent;
switch (in_line) {
.empty => unreachable,
.scalar => |str| try putMap(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior),
.flow_list => |str| try putMap(map, dangling_key.?, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
.scalar => |str| try self.putMapKey(map, empty_key.?, try valueFromString(arena_alloc, str)),
.flow_list => |str| try self.putMapKey(map, empty_key.?, try parseFlowList(arena_alloc, str)),
.flow_map => |str| {
try putMap(map, dangling_key.?, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior);
try self.putMapKey(map, empty_key.?, try self.parseFlowMap(arena_alloc, str));
},
.line_string, .space_string => |str| {
.string => |str| {
// string pushes the stack
const new_string = try putMapGetValue(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior);
try new_string.string.append(in_line.lineEnding());
const new_string = try self.putMapKeyGetValue(map, empty_key.?, try valueFromString(arena_alloc, str));
if (str[str.len - 1] != '\n') try new_string.string.append(' ');
try stack.append(new_string);
expect_shift = .none;
},
}
dangling_key = null;
empty_key = null;
},
.list_item => |value| {
// this prong cannot be hit on dedent in a valid way.
@ -819,19 +747,19 @@ pub const Parser = struct {
//
// dedenting back to the map stack level requires map_item
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
if (expect_shift != .indent or line.indent != .indent or empty_key == null)
return error.UnexpectedValue;
const new_list = try putMapGetValue(map, dangling_key.?, .{ .list = List(Value).init(arena_alloc) }, self.dupe_behavior);
const new_list = try self.putMapKeyGetValue(map, empty_key.?, .{ .list = List(Value).init(arena_alloc) });
try stack.append(new_list);
dangling_key = null;
empty_key = null;
expect_shift = .none;
switch (value) {
.empty => expect_shift = .indent,
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.scalar, .string => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_list.list.append(try self.parseFlowMap(arena_alloc, str)),
}
},
.map_item => |pair| {
@ -841,28 +769,28 @@ pub const Parser = struct {
.none, .dedent => switch (pair.val) {
.empty => {
expect_shift = .indent;
dangling_key = pair.key;
empty_key = pair.key;
},
.line_string, .space_string, .scalar => |str| try putMap(map, pair.key, try valueFromString(arena_alloc, str), self.dupe_behavior),
.flow_list => |str| try putMap(map, pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
.flow_map => |str| try putMap(map, pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
.scalar, .string => |str| try self.putMapKey(map, pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try self.putMapKey(map, pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try self.putMapKey(map, pair.key, try self.parseFlowMap(arena_alloc, str)),
},
// a new map is being created
.indent => {
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
if (expect_shift != .indent or empty_key == null) return error.UnexpectedValue;
const new_map = try putMapGetValue(map, dangling_key.?, .{ .map = Map(Value).init(arena_alloc) }, self.dupe_behavior);
const new_map = try self.putMapKeyGetValue(map, empty_key.?, .{ .map = Map(Value).init(arena_alloc) });
try stack.append(new_map);
dangling_key = null;
empty_key = null;
switch (pair.val) {
.empty => {
expect_shift = .indent;
dangling_key = pair.key;
empty_key = pair.key;
},
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
.scalar, .string => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str)),
.flow_map => |str| try new_map.map.put(pair.key, try self.parseFlowMap(arena_alloc, str)),
}
},
}
@ -888,11 +816,11 @@ pub const Parser = struct {
},
.value => switch (stack.getLast().*) {
// remove the final trailing newline or space
.string => |*string| _ = string.popOrNull(),
.string => |*string| _ = string.pop(),
// if we have a dangling -, attach an empty string to it
.list => |*list| if (expect_shift == .indent) try list.append(try valueFromString(arena_alloc, "")),
// if we have a dangling "key:", attach an empty string to it
.map => |*map| if (dangling_key) |dk| try putMap(map, dk, try valueFromString(arena_alloc, ""), self.dupe_behavior),
// if we have a dangling key:, attach an empty string to it
.map => |*map| if (empty_key) |ek| try self.putMapKey(map, ek, try valueFromString(arena_alloc, "")),
},
.done => {},
}
@ -906,18 +834,42 @@ pub const Parser = struct {
return result;
}
fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
var parser = try FlowParser.initList(alloc, contents);
defer parser.deinit();
fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8) Error!Value {
// TODO: if we pass in the parse stack, is it straightforward to support nested
// lists/maps? Can seek the split iterator by manually setting index.
var result: Value = .{ .list = List(Value).init(alloc) };
return try parser.parse(dupe_behavior);
// TODO: consume exactly one space after the comma
var splitter = std.mem.splitScalar(u8, contents, ',');
while (splitter.next()) |entry| {
try result.list.append(
try valueFromString(alloc, std.mem.trim(u8, entry, " ")),
);
}
return result;
}
fn parseFlowMap(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
var parser = try FlowParser.initMap(alloc, contents);
defer parser.deinit();
fn parseFlowMap(self: *Parser, alloc: std.mem.Allocator, contents: []const u8) Error!Value {
var result: Value = .{ .map = Map(Value).init(alloc) };
return try parser.parse(dupe_behavior);
var splitter = std.mem.splitScalar(u8, contents, ',');
while (splitter.next()) |entry| {
const trimmed = std.mem.trim(u8, entry, " ");
// TODO: consume exactly one space after the colon?
const colon = std.mem.indexOfScalar(u8, trimmed, ':') orelse
return error.BadMapEntry;
try self.putMapKey(
&result.map,
trimmed[0..colon],
try valueFromString(alloc, std.mem.trimLeft(u8, trimmed[colon + 1 .. trimmed.len], " ")),
);
}
return result;
}
inline fn appendListGetValue(list: *List(Value), value: Value) Error!*Value {
@ -925,15 +877,15 @@ pub const Parser = struct {
return &list.items[list.items.len - 1];
}
inline fn putMap(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void {
_ = try putMapGetValue(map, key, value, dupe_behavior);
inline fn putMapKey(self: *Parser, map: *Map(Value), key: []const u8, value: Value) Error!void {
_ = try self.putMapKeyGetValue(map, key, value);
}
inline fn putMapGetValue(map: *Map(Value), key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value {
inline fn putMapKeyGetValue(self: *Parser, map: *Map(Value), key: []const u8, value: Value) Error!*Value {
const gop = try map.getOrPut(key);
if (gop.found_existing)
switch (dupe_behavior) {
switch (self.dupe_behavior) {
.fail => return error.DuplicateKey,
.use_first => {},
.use_last => gop.value_ptr.* = value,
@ -987,306 +939,3 @@ pub const Parser = struct {
});
}
};
pub const FlowParser = struct {
pub const Value = Parser.Value;
const FlowStackItem = struct {
value: *Value,
// lists need this. maps do also for keys and values.
item_start: usize = 0,
};
const FlowStack: type = std.ArrayList(FlowStackItem);
buffer: []const u8,
root: Value,
alloc: std.mem.Allocator,
stack: FlowStack,
state: ParseState,
// make this an ugly state machine parser
const ParseState = enum {
want_list_item,
consuming_list_item,
want_list_separator,
want_map_key,
consuming_map_key,
want_map_value,
consuming_map_value,
want_map_separator,
done,
};
const Error = error{
BadState,
BadToken,
} || std.mem.Allocator.Error;
pub fn initList(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
return .{
.buffer = buffer,
.root = undefined,
.alloc = alloc,
.stack = undefined,
.state = .want_list_item,
};
}
pub fn initMap(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
return .{
.buffer = buffer,
.root = undefined,
.alloc = alloc,
.stack = undefined,
.state = .want_map_key,
};
}
pub fn deinit(self: *FlowParser) void {
self.stack.deinit();
}
inline fn getStackTip(stack: FlowStack) Error!*FlowStackItem {
if (stack.items.len == 0) return error.BadState;
return &stack.items[stack.items.len - 1];
}
inline fn setStackItemStart(stack: FlowStack, start: usize) Error!void {
if (stack.items.len == 0) return error.BadState;
stack.items[stack.items.len - 1].item_start = start;
}
inline fn popStack(self: *FlowParser, idx: usize) Parser.Error!void {
const finished = self.stack.popOrNull() orelse return error.BadState;
if (finished.value.* == .list) {
// this is not valid if we are in the want_list_separator state because
// there is no trailing comma in that state
if (self.state == .want_list_item and (finished.value.list.items.len > 0 or idx > finished.item_start))
try finished.value.list.append(
try Parser.valueFromString(self.alloc, ""),
)
else if (self.state == .consuming_list_item)
try finished.value.list.append(
try Parser.valueFromString(
self.alloc,
self.buffer[finished.item_start..idx],
),
);
}
const parent = self.stack.getLastOrNull() orelse {
self.state = .done;
return;
};
switch (parent.value.*) {
.list => self.state = .want_list_separator,
.map => self.state = .want_map_separator,
else => return error.BadState,
}
}
pub fn parse(self: *FlowParser, dupe_behavior: Parser.DuplicateKeyBehavior) Parser.Error!Value {
// prime the stack:
switch (self.state) {
.want_list_item => {
self.root = Value.newList(self.alloc);
self.stack = try FlowStack.initCapacity(self.alloc, 1);
self.stack.appendAssumeCapacity(.{ .value = &self.root });
},
.want_map_key => {
self.root = Value.newMap(self.alloc);
self.stack = try FlowStack.initCapacity(self.alloc, 1);
self.stack.appendAssumeCapacity(.{ .value = &self.root });
},
else => {
return error.BadState;
},
}
var dangling_key: ?[]const u8 = null;
charloop: for (self.buffer, 0..) |char, idx| {
// std.debug.print("{s} => {c}\n", .{ @tagName(self.state), char });
switch (self.state) {
.want_list_item => switch (char) {
' ', '\t' => continue :charloop,
',' => {
// empty value
const tip = try getStackTip(self.stack);
try tip.value.list.append(try Value.fromString(self.alloc, ""));
tip.item_start = idx + 1;
},
'{' => {
const tip = try getStackTip(self.stack);
const new_map = try Parser.appendListGetValue(
&tip.value.list,
Value.newMap(self.alloc),
);
tip.item_start = idx;
try self.stack.append(.{ .value = new_map });
self.state = .want_map_key;
},
'[' => {
const tip = try getStackTip(self.stack);
const new_list = try Parser.appendListGetValue(
&tip.value.list,
Value.newList(self.alloc),
);
tip.item_start = idx;
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_list_item;
},
},
.consuming_list_item => switch (char) {
',' => {
const tip = try getStackTip(self.stack);
try tip.value.list.append(
try Value.fromString(self.alloc, self.buffer[tip.item_start..idx]),
);
tip.item_start = idx + 1;
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => continue :charloop,
},
.want_list_separator => switch (char) {
' ', '\t' => continue :charloop,
',' => {
try setStackItemStart(self.stack, idx);
self.state = .want_list_item;
},
']' => try self.popStack(idx),
else => return error.BadToken,
},
.want_map_key => switch (char) {
' ', '\t' => continue :charloop,
// forbid these characters so that flow dictionary keys cannot start
// with characters that regular dictionary keys cannot start with
// (even though they're unambiguous in this specific context).
'{', '[', '#', '>', '|', ',' => return error.BadToken,
':' => {
// we have an empty map key
dangling_key = "";
self.state = .want_map_value;
},
'}' => try self.popStack(idx),
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_map_key;
},
},
.consuming_map_key => switch (char) {
':' => {
const tip = try getStackTip(self.stack);
dangling_key = self.buffer[tip.item_start..idx];
self.state = .want_map_value;
},
else => continue :charloop,
},
.want_map_value => switch (char) {
' ', '\t' => continue :charloop,
',' => {
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, ""),
dupe_behavior,
);
dangling_key = null;
self.state = .want_map_key;
},
'[' => {
const tip = try getStackTip(self.stack);
const new_list = try Parser.putMapGetValue(
&tip.value.map,
dangling_key.?,
Value.newList(self.alloc),
dupe_behavior,
);
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
dangling_key = null;
self.state = .want_list_item;
},
'{' => {
const tip = try getStackTip(self.stack);
const new_map = try Parser.putMapGetValue(
&tip.value.map,
dangling_key.?,
Value.newMap(self.alloc),
dupe_behavior,
);
try self.stack.append(.{ .value = new_map });
dangling_key = null;
self.state = .want_map_key;
},
'}' => {
// the value is an empty string and this map is closed
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, ""),
dupe_behavior,
);
dangling_key = null;
try self.popStack(idx);
},
else => {
try setStackItemStart(self.stack, idx);
self.state = .consuming_map_value;
},
},
.consuming_map_value => switch (char) {
',', '}' => |term| {
const tip = try getStackTip(self.stack);
try Parser.putMap(
&tip.value.map,
dangling_key.?,
try Parser.valueFromString(self.alloc, self.buffer[tip.item_start..idx]),
dupe_behavior,
);
dangling_key = null;
self.state = .want_map_key;
if (term == '}') try self.popStack(idx);
},
else => continue :charloop,
},
.want_map_separator => switch (char) {
' ', '\t' => continue :charloop,
',' => self.state = .want_map_key,
'}' => try self.popStack(idx),
else => return error.BadToken,
},
// the root value was closed but there are characters remaining
// in the buffer
.done => return error.BadState,
}
}
// we ran out of characters while still in the middle of an object
if (self.state != .done) return error.BadState;
return self.root;
}
};