This is a simplification, but the main motivation is that the flow parser stack can be integrated with the main parser stack because they are not disparate types any more.
1303 lines
54 KiB
Zig
1303 lines
54 KiB
Zig
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
|
|
//
|
|
// - Doesn't support multiline keys (this means map keys cannot start with
|
|
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
|
|
// - Allows using tabs for indentation (but not mixed tabs/spaces)
|
|
// - Indentation must be quantized consistently throughout the document. e.g.
|
|
// every nested layer being exactly 2 spaces past its parent. Tabs may
|
|
// only use one tab per indentation level.
|
|
// - Allows flow-style lists, maps, and strings on the same line as map keys or
|
|
// list items (i.e. the following are legal):
|
|
//
|
|
// key: {inline: map}
|
|
// key: [inline, list]
|
|
// key: > inline string
|
|
// - {map: item}
|
|
// - [list, item]
|
|
// - > inline string
|
|
//
|
|
// The string case retains the possibility of having an inline map value starting
|
|
// with {, [, or >
|
|
// - inline lists and maps cannot contain other inline structures. This may
|
|
// change, as writing {:[{:[{:[{:[{:[{:[]}]}]}]}]}]} seems tremendously useful
|
|
// - a map keys and list item dashes must be followed by a value or an indented
|
|
// section to reduce parser quantum state. This means that
|
|
//
|
|
// foo:
|
|
// bar: baz
|
|
//
|
|
// or
|
|
//
|
|
// -
|
|
// - qux
|
|
//
|
|
// are not valid. This can be represented with an inline empty string after foo:
|
|
//
|
|
// foo: >
|
|
// bar: baz
|
|
//
|
|
// or
|
|
//
|
|
// - >
|
|
// - qux
|
|
//
|
|
// - newlines are strictly LF, if the parser finds CR, it is an error
|
|
// - blank lines may not contain any whitespace characters except the single LF
|
|
// - Additional string indicator `|` for soft-wrapped strings, i.e.
|
|
//
|
|
// key: | this is not special
|
|
// key:
|
|
// | these lines are
|
|
// | soft-wrapped
|
|
//
|
|
// soft-wrapped lines are joined with a ' ' instead of a newline character.
|
|
// Like multiline strings, the final space is stripped (I guess this is a very
|
|
// janky way to add trailing whitespace to a string).
|
|
//
|
|
// - terminated strings to allow trailing whitespace:
|
|
// | this string has trailing whitespace |
|
|
// > and so does this one |
|
|
// - The parser is both strict and probably sloppy and may have weird edge
|
|
// cases since I'm slinging code, not writing a spec. For example, tabs are
|
|
// not trimmed from the values of inline lists/maps
|
|
|
|
const std = @import("std");
|
|
|
|
pub const IndexSlice = struct { start: usize, len: usize };
|
|
|
|
pub const Diagnostics = struct {
|
|
row: usize,
|
|
span: struct { absolute: usize, line_offset: usize, length: usize },
|
|
message: []const u8,
|
|
};
|
|
|
|
pub const LineBuffer = struct {
|
|
allocator: std.mem.Allocator,
|
|
buffer: []u8,
|
|
used: usize,
|
|
window: IndexSlice,
|
|
|
|
pub const default_capacity: usize = 4096;
|
|
pub const Error = std.mem.Allocator.Error;
|
|
|
|
pub fn init(allocator: std.mem.Allocator) Error!LineBuffer {
|
|
return initCapacity(allocator, default_capacity);
|
|
}
|
|
|
|
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) Error!LineBuffer {
|
|
return .{
|
|
.allocator = allocator,
|
|
.buffer = try allocator.alloc(u8, capacity),
|
|
.used = 0,
|
|
.window = .{ .start = 0, .len = 0 },
|
|
};
|
|
}
|
|
|
|
pub fn feed(self: *LineBuffer, data: []const u8) Error!void {
|
|
if (data.len == 0) return;
|
|
// TODO: check for usize overflow here if we want Maximum Robustness
|
|
const new_window_len = self.window.len + data.len;
|
|
|
|
// data cannot fit in the buffer with our scan window, so we have to realloc
|
|
if (new_window_len > self.buffer.len) {
|
|
// TODO: adopt an overallocation strategy? Will potentially avoid allocating
|
|
// on every invocation but will cause the buffer to oversize
|
|
try self.allocator.realloc(self.buffer, new_window_len);
|
|
self.rehome();
|
|
@memcpy(self.buffer[self.used..].ptr, data);
|
|
self.used = new_window_len;
|
|
self.window.len = new_window_len;
|
|
}
|
|
// data will fit, but needs to be moved in the buffer
|
|
else if (self.window.start + new_window_len > self.buffer.len) {
|
|
self.rehome();
|
|
@memcpy(self.buffer[self.used..].ptr, data);
|
|
self.used = new_window_len;
|
|
self.window.len = new_window_len;
|
|
}
|
|
// data can simply be appended
|
|
else {
|
|
@memcpy(self.buffer[self.used..].ptr, data);
|
|
}
|
|
}
|
|
|
|
/// The memory returned by this function is valid until the next call to `feed`.
|
|
/// The resulting slice does not include the newline character.
|
|
pub fn nextLine(self: *LineBuffer) ?[]const u8 {
|
|
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
|
return null;
|
|
|
|
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
|
|
self.window.start += split + 1;
|
|
self.window.len -= split + 1;
|
|
|
|
return window[0..split];
|
|
}
|
|
|
|
fn rehome(self: *LineBuffer) void {
|
|
if (self.window.start == 0) return;
|
|
|
|
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
|
|
if (self.window.len > self.window.start)
|
|
std.mem.copyForwards(u8, self.buffer, window)
|
|
else
|
|
@memcpy(self.buffer.ptr, window);
|
|
|
|
self.window.start = 0;
|
|
self.used = window.len;
|
|
}
|
|
};
|
|
|
|
pub const FixedLineBuffer = struct {
|
|
buffer: []const u8,
|
|
window: IndexSlice,
|
|
|
|
pub fn init(data: []const u8) FixedLineBuffer {
|
|
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
|
}
|
|
|
|
pub fn nextLine(self: *FixedLineBuffer) ?[]const u8 {
|
|
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
|
return null;
|
|
|
|
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
|
|
self.window.start += split + 1;
|
|
self.window.len -= split + 1;
|
|
|
|
return window[0..split];
|
|
}
|
|
};
|
|
|
|
const IndentationType = union(enum) {
|
|
immaterial: void,
|
|
spaces: usize,
|
|
tabs: void,
|
|
};
|
|
|
|
const InlineItem = union(enum) {
|
|
empty: void,
|
|
scalar: []const u8,
|
|
line_string: []const u8,
|
|
space_string: []const u8,
|
|
|
|
flow_list: []const u8,
|
|
flow_map: []const u8,
|
|
|
|
fn lineEnding(self: InlineItem) u8 {
|
|
return switch (self) {
|
|
.line_string => '\n',
|
|
.space_string => ' ',
|
|
else => unreachable,
|
|
};
|
|
}
|
|
};
|
|
|
|
const LineContents = union(enum) {
|
|
comment: []const u8,
|
|
|
|
in_line: InlineItem,
|
|
list_item: InlineItem,
|
|
map_item: struct { key: []const u8, val: InlineItem },
|
|
};
|
|
|
|
// we can dedent multiple levels at once. Example:
|
|
//
|
|
// foo:
|
|
// bar:
|
|
// > a
|
|
// > string
|
|
// baz: [qux]
|
|
//
|
|
// capturing this is conceptually simple, but implementing it without complex
|
|
// indentation tracking requires quantizing the indentation. This means our
|
|
// IndentationType will also need to track the number of spaces used for
|
|
// indentation, as detected. Then every line we have to check indent rem the
|
|
// quantization level == 0 (otherwise we broke quantization) and compute indent
|
|
// div the quantization level to give us our effective indentation level.
|
|
|
|
const ShiftDirection = enum { indent, dedent, none };
|
|
const RelativeIndent = union(ShiftDirection) {
|
|
indent: void,
|
|
dedent: usize,
|
|
none: void,
|
|
};
|
|
|
|
const Line = struct {
|
|
indent: RelativeIndent,
|
|
contents: LineContents,
|
|
raw: []const u8,
|
|
};
|
|
|
|
pub fn LineTokenizer(comptime Buffer: type) type {
|
|
return struct {
|
|
buffer: Buffer,
|
|
index: usize = 0,
|
|
indentation: IndentationType = .immaterial,
|
|
last_indent: usize = 0,
|
|
diagnostics: *Diagnostics,
|
|
row: usize = 0,
|
|
|
|
const Error = error{
|
|
BadToken,
|
|
MixedIndentation,
|
|
UnquantizedIndentation,
|
|
TooMuchIndentation,
|
|
MissingNewline,
|
|
TrailingWhitespace,
|
|
Impossible,
|
|
};
|
|
|
|
pub fn next(self: *@This()) Error!?Line {
|
|
lineloop: while (self.buffer.nextLine()) |raw_line| {
|
|
var indent: usize = 0;
|
|
for (raw_line, 0..) |char, idx| {
|
|
switch (char) {
|
|
' ' => {
|
|
switch (self.indentation) {
|
|
// There's a weird coupling here because we can't set this until
|
|
// all spaces have been consumed. I also thought about ignoring
|
|
// spaces on comment lines since those don't affect the
|
|
// relative indent/dedent, but then we would allow comments
|
|
// to ignore our indent quantum, which I dislike due to it making
|
|
// ugly documents.
|
|
.immaterial => self.indentation = .{ .spaces = 0 },
|
|
.spaces => {},
|
|
.tabs => return error.MixedIndentation,
|
|
}
|
|
},
|
|
'\t' => {
|
|
switch (self.indentation) {
|
|
.immaterial => self.indentation = .tabs,
|
|
.spaces => return error.MixedIndentation,
|
|
.tabs => {},
|
|
}
|
|
},
|
|
'\r' => {
|
|
return error.BadToken;
|
|
},
|
|
else => {
|
|
indent = idx;
|
|
break;
|
|
},
|
|
}
|
|
} else {
|
|
if (raw_line.len > 0) return error.TrailingWhitespace;
|
|
continue :lineloop;
|
|
}
|
|
|
|
var quantized: usize = if (self.indentation == .spaces) quant: {
|
|
if (self.indentation.spaces == 0) {
|
|
self.indentation.spaces = indent;
|
|
}
|
|
if (@rem(indent, self.indentation.spaces) != 0)
|
|
return error.UnquantizedIndentation;
|
|
|
|
break :quant @divExact(indent, self.indentation.spaces);
|
|
} else indent;
|
|
|
|
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
|
|
if ((quantized - self.last_indent) > 1)
|
|
return error.TooMuchIndentation;
|
|
break :rel .indent;
|
|
} else if (quantized < self.last_indent)
|
|
.{ .dedent = self.last_indent - quantized }
|
|
else
|
|
.none;
|
|
|
|
defer {
|
|
self.row += 1;
|
|
self.last_indent = quantized;
|
|
}
|
|
|
|
const line = raw_line[indent..];
|
|
|
|
// this should not be possible, as empty lines are caught earlier.
|
|
if (line.len == 0) return error.Impossible;
|
|
|
|
switch (line[0]) {
|
|
'#' => {
|
|
// simply lie about indentation when the line is a comment.
|
|
quantized = self.last_indent;
|
|
return .{
|
|
.indent = .none,
|
|
.contents = .{ .comment = line[1..] },
|
|
.raw = line,
|
|
};
|
|
},
|
|
'|', '>', '[', '{' => {
|
|
return .{
|
|
.indent = relative,
|
|
.contents = .{ .in_line = try detectInlineItem(line) },
|
|
.raw = line,
|
|
};
|
|
},
|
|
'-' => {
|
|
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
|
|
|
return if (line.len == 1) .{
|
|
.indent = relative,
|
|
.contents = .{ .list_item = .empty },
|
|
.raw = line,
|
|
} else .{
|
|
.indent = relative,
|
|
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
|
.raw = line,
|
|
};
|
|
},
|
|
else => {
|
|
for (line, 0..) |char, idx| {
|
|
if (char == ':') {
|
|
if (idx + 1 == line.len) return .{
|
|
.indent = relative,
|
|
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
|
.raw = line,
|
|
};
|
|
|
|
if (line[idx + 1] != ' ') return error.BadToken;
|
|
|
|
return .{
|
|
.indent = relative,
|
|
.contents = .{ .map_item = .{
|
|
.key = line[0..idx],
|
|
.val = try detectInlineItem(line[idx + 2 ..]),
|
|
} },
|
|
.raw = line,
|
|
};
|
|
}
|
|
}
|
|
|
|
return .{
|
|
.indent = relative,
|
|
.contents = .{ .in_line = .{ .scalar = line } },
|
|
.raw = line,
|
|
};
|
|
},
|
|
}
|
|
|
|
// somehow everything else has failed
|
|
return error.Impossible;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
|
if (buf.len == 0) return .empty;
|
|
|
|
switch (buf[0]) {
|
|
'>', '|' => |char| {
|
|
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
|
|
|
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
|
' ', '\t' => return error.TrailingWhitespace,
|
|
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
|
else => buf[@min(2, buf.len)..buf.len],
|
|
};
|
|
|
|
return if (char == '>')
|
|
.{ .line_string = slice }
|
|
else
|
|
.{ .space_string = slice };
|
|
},
|
|
'[' => {
|
|
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
|
return error.BadToken;
|
|
|
|
// keep the closing ] for the flow parser
|
|
return .{ .flow_list = buf[1..] };
|
|
},
|
|
'{' => {
|
|
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
|
return error.BadToken;
|
|
|
|
// keep the closing } fpr the flow parser
|
|
return .{ .flow_map = buf[1..] };
|
|
},
|
|
else => {
|
|
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
|
return error.TrailingWhitespace;
|
|
|
|
return .{ .scalar = buf };
|
|
},
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
pub const Value = union(enum) {
|
|
pub const String = std.ArrayList(u8);
|
|
pub const Map = std.StringArrayHashMap(Value);
|
|
pub const List = std.ArrayList(Value);
|
|
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
|
|
|
scalar: String,
|
|
string: String,
|
|
list: List,
|
|
flow_list: List,
|
|
map: Map,
|
|
flow_map: Map,
|
|
|
|
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .scalar, input);
|
|
}
|
|
|
|
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
return try _fromScalarOrString(alloc, .string, input);
|
|
}
|
|
|
|
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
|
var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
|
|
@field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
|
|
return res;
|
|
}
|
|
|
|
pub inline fn newScalar(alloc: std.mem.Allocator) Value {
|
|
return .{ .scalar = String.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
|
return .{ .string = String.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
|
return .{ .list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
|
|
return .{ .flow_list = List.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .map = Map.init(alloc) };
|
|
}
|
|
|
|
pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
|
|
return .{ .flow_map = Map.init(alloc) };
|
|
}
|
|
|
|
pub fn printDebug(self: Value) void {
|
|
self.printRecursive(0);
|
|
std.debug.print("\n", .{});
|
|
}
|
|
|
|
fn printRecursive(self: Value, indent: usize) void {
|
|
switch (self) {
|
|
.scalar, .string => |str| {
|
|
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
|
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
|
std.debug.print("\n", .{});
|
|
while (lines.next()) |line| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
|
.{
|
|
.empty = "",
|
|
.indent = indent,
|
|
.line = line,
|
|
.nl = if (lines.peek() == null) "" else "\n",
|
|
},
|
|
);
|
|
}
|
|
} else {
|
|
std.debug.print("{s}", .{str.items});
|
|
}
|
|
},
|
|
.list, .flow_list => |list| {
|
|
if (list.items.len == 0) {
|
|
std.debug.print("[]", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("[\n", .{});
|
|
for (list.items, 0..) |value, idx| {
|
|
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
|
value.printRecursive(indent + 2);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}]",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
.map, .flow_map => |map| {
|
|
if (map.count() == 0) {
|
|
std.debug.print("{{}}", .{});
|
|
return;
|
|
}
|
|
|
|
std.debug.print("{{\n", .{});
|
|
|
|
var iter = map.iterator();
|
|
|
|
while (iter.next()) |entry| {
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}{[key]s}: ",
|
|
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
|
);
|
|
entry.value_ptr.printRecursive(indent + 4);
|
|
std.debug.print(",\n", .{});
|
|
}
|
|
std.debug.print(
|
|
"{[empty]s: >[indent]}}}",
|
|
.{ .empty = "", .indent = indent },
|
|
);
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
pub const Parser = struct {
|
|
allocator: std.mem.Allocator,
|
|
dupe_behavior: DuplicateKeyBehavior = .fail,
|
|
default_object: DefaultObject = .fail,
|
|
diagnostics: Diagnostics = .{
|
|
.row = 0,
|
|
.span = .{ .absolute = 0, .line_offset = 0, .length = 0 },
|
|
.message = "all is well",
|
|
},
|
|
|
|
pub const Error = error{
|
|
UnexpectedIndent,
|
|
UnexpectedValue,
|
|
ExtraContent,
|
|
EmptyDocument,
|
|
DuplicateKey,
|
|
BadMapEntry,
|
|
BadState,
|
|
BadToken,
|
|
Fail,
|
|
} || LineTokenizer(FixedLineBuffer).Error || std.mem.Allocator.Error;
|
|
|
|
pub const DuplicateKeyBehavior = enum {
|
|
use_first,
|
|
use_last,
|
|
fail,
|
|
};
|
|
|
|
pub const DefaultObject = enum {
|
|
string,
|
|
list,
|
|
map,
|
|
fail,
|
|
};
|
|
|
|
pub const ParseState = enum {
|
|
initial,
|
|
value,
|
|
done,
|
|
};
|
|
|
|
pub const Document = struct {
|
|
arena: std.heap.ArenaAllocator,
|
|
root: Value,
|
|
|
|
pub fn init(alloc: std.mem.Allocator) Document {
|
|
return .{
|
|
.arena = std.heap.ArenaAllocator.init(alloc),
|
|
.root = undefined,
|
|
};
|
|
}
|
|
|
|
pub fn printDebug(self: Document) void {
|
|
return self.root.printDebug();
|
|
}
|
|
|
|
pub fn deinit(self: Document) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
|
|
pub const State = struct {
|
|
pub const Stack = std.ArrayList(*Value);
|
|
|
|
document: Document,
|
|
value_stack: Stack,
|
|
state: ParseState = .initial,
|
|
expect_shift: ShiftDirection = .none,
|
|
dangling_key: ?[]const u8 = null,
|
|
|
|
pub fn init(alloc: std.mem.Allocator) State {
|
|
return .{
|
|
.document = Document.init(alloc),
|
|
.value_stack = Stack.init(alloc),
|
|
};
|
|
}
|
|
|
|
pub fn deinit(self: State) void {
|
|
self.value_stack.deinit();
|
|
}
|
|
};
|
|
|
|
pub fn parseBuffer(self: *Parser, buffer: []const u8) Error!Document {
|
|
var document = Document.init(self.allocator);
|
|
errdefer document.deinit();
|
|
const arena_alloc = document.arena.allocator();
|
|
|
|
var state: ParseState = .initial;
|
|
var expect_shift: ShiftDirection = .none;
|
|
var dangling_key: ?[]const u8 = null;
|
|
var stack = std.ArrayList(*Value).init(arena_alloc);
|
|
defer stack.deinit();
|
|
|
|
var tok: LineTokenizer(FixedLineBuffer) = .{
|
|
.buffer = FixedLineBuffer.init(buffer),
|
|
.diagnostics = &self.diagnostics,
|
|
};
|
|
|
|
while (try tok.next()) |line| {
|
|
if (line.contents == .comment) continue;
|
|
|
|
var flip = true;
|
|
var flop = false;
|
|
// this is needed to give us a second go round when the line is dedented
|
|
flipflop: while (flip) : (flop = true) {
|
|
switch (state) {
|
|
.initial => {
|
|
if (line.indent == .indent) return error.UnexpectedIndent;
|
|
|
|
switch (line.contents) {
|
|
// we filter out comments above
|
|
.comment => unreachable,
|
|
.in_line => |in_line| switch (in_line) {
|
|
// empty scalars are only emitted for a list_item or a map_item
|
|
.empty => unreachable,
|
|
.scalar => |str| {
|
|
document.root = try Value.fromScalar(arena_alloc, str);
|
|
// this is a cheesy hack. If the document consists
|
|
// solely of a scalar, the finalizer will try to
|
|
// chop a line ending off of it, so we need to add
|
|
// a sacrificial padding character to avoid
|
|
// chopping off something that matters.
|
|
try document.root.string.append(' ');
|
|
state = .done;
|
|
},
|
|
.line_string, .space_string => |str| {
|
|
document.root = try Value.fromString(arena_alloc, str);
|
|
try document.root.string.append(in_line.lineEnding());
|
|
try stack.append(&document.root);
|
|
state = .value;
|
|
},
|
|
.flow_list => |str| {
|
|
document.root = try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior);
|
|
state = .done;
|
|
},
|
|
.flow_map => |str| {
|
|
document.root = try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior);
|
|
state = .done;
|
|
},
|
|
},
|
|
.list_item => |value| {
|
|
document.root = Value.newList(arena_alloc);
|
|
try stack.append(&document.root);
|
|
state = .value;
|
|
|
|
switch (value) {
|
|
.empty => expect_shift = .indent,
|
|
.scalar => |str| try document.root.list.append(try Value.fromScalar(arena_alloc, str)),
|
|
.line_string, .space_string => |str| try document.root.list.append(try Value.fromString(arena_alloc, str)),
|
|
.flow_list => |str| try document.root.list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
|
.flow_map => |str| try document.root.list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
|
}
|
|
},
|
|
.map_item => |pair| {
|
|
document.root = Value.newMap(arena_alloc);
|
|
try stack.append(&document.root);
|
|
state = .value;
|
|
|
|
const dupekey = try arena_alloc.dupe(u8, pair.key);
|
|
switch (pair.val) {
|
|
.empty => {
|
|
expect_shift = .indent;
|
|
// If the key is on its own line, we don't have
|
|
// an associated value until we parse the next
|
|
// line. We need to store a reference to this
|
|
// key somewhere until we can consume the
|
|
// value. More parser state to lug along.
|
|
|
|
dangling_key = dupekey;
|
|
},
|
|
.scalar => |str| try document.root.map.put(dupekey, try Value.fromScalar(arena_alloc, str)),
|
|
.line_string, .space_string => |str| try document.root.map.put(dupekey, try Value.fromString(arena_alloc, str)),
|
|
.flow_list => |str| try document.root.map.put(dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
|
.flow_map => |str| try document.root.map.put(dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
|
}
|
|
},
|
|
}
|
|
},
|
|
.value => switch (stack.getLast().*) {
|
|
// these three states are never reachable here. flow_list and
|
|
// flow_map are parsed with a separate state machine. These
|
|
// value types can only be present by themselves as the first
|
|
// line of the document, in which case the document consists
|
|
// only of that single line: this parser jumps immediately into
|
|
// the .done state, bypassing the .value state in which this
|
|
// switch is embedded.
|
|
.scalar, .flow_list, .flow_map => unreachable,
|
|
.string => |*string| {
|
|
if (line.indent == .indent)
|
|
return error.UnexpectedIndent;
|
|
|
|
if (!flop and line.indent == .dedent) {
|
|
// kick off the last trailing space or newline
|
|
_ = string.pop();
|
|
|
|
var dedent_depth = line.indent.dedent;
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
_ = stack.pop();
|
|
|
|
continue :flipflop;
|
|
}
|
|
|
|
switch (line.contents) {
|
|
.comment => unreachable,
|
|
.in_line => |in_line| switch (in_line) {
|
|
.empty => unreachable,
|
|
.line_string, .space_string => |str| {
|
|
try string.appendSlice(str);
|
|
try string.append(in_line.lineEnding());
|
|
},
|
|
else => return error.UnexpectedValue,
|
|
},
|
|
else => return error.UnexpectedValue,
|
|
}
|
|
},
|
|
.list => |*list| {
|
|
// detect that the previous item was actually empty
|
|
//
|
|
// -
|
|
// - something
|
|
//
|
|
// the first line here creates the expect_shift, but the second line
|
|
// is a valid continuation of the list despite not being indented
|
|
if (!flop and (expect_shift == .indent and line.indent != .indent))
|
|
try list.append(Value.newScalar(arena_alloc));
|
|
|
|
// Consider:
|
|
//
|
|
// -
|
|
// own-line scalar
|
|
// - inline scalar
|
|
//
|
|
// the own-line scalar will not push the stack but the next list item will be a dedent
|
|
if (!flop and line.indent == .dedent) {
|
|
// if line.indent.dedent is 1 and we're expecting it, the stack will not be popped,
|
|
// but we will continue loop flipflop. However, flop will be set to false on the next
|
|
// trip, so this if prong will not be run again.
|
|
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
|
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
_ = stack.pop();
|
|
|
|
continue :flipflop;
|
|
}
|
|
|
|
switch (line.contents) {
|
|
.comment => unreachable,
|
|
.in_line => |in_line| {
|
|
// assert that this line has been indented. this is required for an inline value when
|
|
// the stack is in list mode.
|
|
if (expect_shift != .indent or line.indent != .indent)
|
|
return error.UnexpectedValue;
|
|
|
|
expect_shift = .dedent;
|
|
switch (in_line) {
|
|
.empty => unreachable,
|
|
.scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
|
|
.flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
|
.flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
|
.line_string, .space_string => |str| {
|
|
// string pushes the stack
|
|
const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str));
|
|
try stack.append(new_string);
|
|
|
|
try new_string.string.append(in_line.lineEnding());
|
|
expect_shift = .none;
|
|
},
|
|
}
|
|
},
|
|
.list_item => |value| {
|
|
if (flop or (line.indent == .none or line.indent == .dedent)) {
|
|
expect_shift = .none;
|
|
switch (value) {
|
|
.empty => expect_shift = .indent,
|
|
.scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
|
|
.line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
|
|
.flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
|
.flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
|
}
|
|
} else if (line.indent == .indent) {
|
|
if (expect_shift != .indent) return error.UnexpectedIndent;
|
|
|
|
const new_list = try appendListGetValue(list, Value.newList(arena_alloc));
|
|
try stack.append(new_list);
|
|
expect_shift = .none;
|
|
continue :flipflop;
|
|
} else unreachable;
|
|
},
|
|
.map_item => {
|
|
// this prong cannot be hit on dedent in a valid way.
|
|
//
|
|
// -
|
|
// map: value
|
|
// second: value
|
|
// third: value
|
|
//
|
|
// dedenting back to the list stack level requires list_item
|
|
|
|
if (line.indent != .indent)
|
|
return error.UnexpectedValue;
|
|
|
|
const new_map = try appendListGetValue(list, Value.newMap(arena_alloc));
|
|
try stack.append(new_map);
|
|
expect_shift = .none;
|
|
continue :flipflop;
|
|
},
|
|
}
|
|
},
|
|
.map => |*map| {
|
|
// detect that the previous item was actually empty
|
|
//
|
|
// foo:
|
|
// bar: baz
|
|
//
|
|
// the first line here creates the expect_shift, but the second line
|
|
// is a valid continuation of the map despite not being indented
|
|
if (!flop and (expect_shift == .indent and line.indent != .indent)) {
|
|
try putMap(
|
|
map,
|
|
dangling_key orelse return error.Fail,
|
|
Value.newScalar(arena_alloc),
|
|
self.dupe_behavior,
|
|
);
|
|
dangling_key = null;
|
|
}
|
|
|
|
if (!flop and line.indent == .dedent) {
|
|
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
|
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
_ = stack.pop();
|
|
|
|
continue :flipflop;
|
|
}
|
|
|
|
switch (line.contents) {
|
|
.comment => unreachable,
|
|
.in_line => |in_line| {
|
|
// assert that this line has been indented. this is required for an inline value when
|
|
// the stack is in map mode.
|
|
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
|
return error.UnexpectedValue;
|
|
|
|
expect_shift = .dedent;
|
|
|
|
switch (in_line) {
|
|
.empty => unreachable,
|
|
.scalar => |str| try putMap(map, dangling_key.?, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
|
|
.flow_list => |str| try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
|
|
.flow_map => |str| {
|
|
try putMap(map, dangling_key.?, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior);
|
|
},
|
|
.line_string, .space_string => |str| {
|
|
// string pushes the stack
|
|
const new_string = try putMapGetValue(map, dangling_key.?, try Value.fromString(arena_alloc, str), self.dupe_behavior);
|
|
try new_string.string.append(in_line.lineEnding());
|
|
try stack.append(new_string);
|
|
expect_shift = .none;
|
|
},
|
|
}
|
|
|
|
dangling_key = null;
|
|
},
|
|
.list_item => {
|
|
// this prong cannot be hit on dedent in a valid way.
|
|
//
|
|
// map:
|
|
// - value
|
|
// - invalid
|
|
//
|
|
// dedenting back to the map stack level requires map_item
|
|
|
|
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
|
return error.UnexpectedValue;
|
|
|
|
const new_list = try putMapGetValue(map, dangling_key.?, Value.newList(arena_alloc), self.dupe_behavior);
|
|
try stack.append(new_list);
|
|
dangling_key = null;
|
|
expect_shift = .none;
|
|
continue :flipflop;
|
|
},
|
|
.map_item => |pair| {
|
|
if (flop or (line.indent == .none or line.indent == .dedent)) {
|
|
expect_shift = .none;
|
|
const dupekey = try arena_alloc.dupe(u8, pair.key);
|
|
switch (pair.val) {
|
|
.empty => {
|
|
expect_shift = .indent;
|
|
dangling_key = dupekey;
|
|
},
|
|
.scalar => |str| try putMap(map, dupekey, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
|
|
.line_string, .space_string => |str| try putMap(map, dupekey, try Value.fromString(arena_alloc, str), self.dupe_behavior),
|
|
.flow_list => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
|
|
.flow_map => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior),
|
|
}
|
|
} else if (line.indent == .indent) {
|
|
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
|
|
|
|
const new_map = try putMapGetValue(map, dangling_key.?, Value.newMap(arena_alloc), self.dupe_behavior);
|
|
try stack.append(new_map);
|
|
dangling_key = null;
|
|
continue :flipflop;
|
|
} else unreachable;
|
|
},
|
|
}
|
|
},
|
|
},
|
|
.done => return error.ExtraContent,
|
|
}
|
|
|
|
// this is specifically performed at the end of the loop body so that
|
|
// `continue :flipflop` skips setting it.
|
|
flip = false;
|
|
}
|
|
}
|
|
|
|
switch (state) {
|
|
.initial => switch (self.default_object) {
|
|
.string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) },
|
|
.list => document.root = Value.newList(arena_alloc),
|
|
.map => document.root = Value.newMap(arena_alloc),
|
|
.fail => return error.EmptyDocument,
|
|
},
|
|
.value => switch (stack.getLast().*) {
|
|
// remove the final trailing newline or space
|
|
.scalar, .string => |*string| _ = string.popOrNull(),
|
|
// if we have a dangling -, attach an empty string to it
|
|
.list => |*list| if (expect_shift == .indent) try list.append(Value.newScalar(arena_alloc)),
|
|
// if we have a dangling "key:", attach an empty string to it
|
|
.map => |*map| if (dangling_key) |dk| try putMap(map, dk, Value.newScalar(arena_alloc), self.dupe_behavior),
|
|
.flow_list, .flow_map => {},
|
|
},
|
|
.done => {},
|
|
}
|
|
|
|
return document;
|
|
}
|
|
|
|
const FlowStack: type = std.ArrayList(*Value);
|
|
|
|
inline fn getStackTip(stack: FlowStack) Error!*Value {
|
|
if (stack.items.len == 0) return error.BadState;
|
|
return stack.items[stack.items.len - 1];
|
|
}
|
|
|
|
inline fn popStack(stack: *FlowStack) Error!FlowParseState {
|
|
if (stack.popOrNull() == null)
|
|
return error.BadState;
|
|
|
|
const parent = stack.getLastOrNull() orelse return .done;
|
|
|
|
return switch (parent.*) {
|
|
.flow_list => .want_list_separator,
|
|
.flow_map => .want_map_separator,
|
|
else => return error.BadState,
|
|
};
|
|
}
|
|
|
|
const FlowParseState = enum {
|
|
want_list_item,
|
|
consuming_list_item,
|
|
want_list_separator,
|
|
want_map_key,
|
|
consuming_map_key,
|
|
want_map_value,
|
|
consuming_map_value,
|
|
want_map_separator,
|
|
done,
|
|
};
|
|
|
|
pub fn parseFlow(
|
|
alloc: std.mem.Allocator,
|
|
contents: []const u8,
|
|
root_type: Value.TagType,
|
|
dupe_behavior: DuplicateKeyBehavior,
|
|
) Error!Value {
|
|
var root: Value = switch (root_type) {
|
|
.flow_list => Value.newFlowList(alloc),
|
|
.flow_map => Value.newFlowMap(alloc),
|
|
else => return error.BadState,
|
|
};
|
|
var state: FlowParseState = switch (root_type) {
|
|
.flow_list => .want_list_item,
|
|
.flow_map => .want_map_key,
|
|
else => unreachable,
|
|
};
|
|
var stack = try FlowStack.initCapacity(alloc, 1);
|
|
stack.appendAssumeCapacity(&root);
|
|
// used to distinguish betwen [] and [ ], and it also tracks
|
|
// a continuous value between different states
|
|
var item_start: usize = 0;
|
|
var dangling_key: ?[]const u8 = null;
|
|
|
|
charloop: for (contents, 0..) |char, idx| {
|
|
switch (state) {
|
|
.want_list_item => switch (char) {
|
|
' ', '\t' => continue :charloop,
|
|
',' => {
|
|
// empty value
|
|
const tip = try getStackTip(stack);
|
|
try tip.flow_list.append(Value.newScalar(alloc));
|
|
item_start = idx + 1;
|
|
},
|
|
'{' => {
|
|
const tip = try getStackTip(stack);
|
|
|
|
const new_map = try Parser.appendListGetValue(
|
|
&tip.flow_list,
|
|
Value.newFlowMap(alloc),
|
|
);
|
|
|
|
item_start = idx;
|
|
try stack.append(new_map);
|
|
state = .want_map_key;
|
|
},
|
|
'[' => {
|
|
const tip = try getStackTip(stack);
|
|
|
|
const new_list = try Parser.appendListGetValue(
|
|
&tip.flow_list,
|
|
Value.newFlowList(alloc),
|
|
);
|
|
|
|
item_start = idx + 1;
|
|
try stack.append(new_list);
|
|
state = .want_list_item;
|
|
},
|
|
']' => {
|
|
const finished = stack.getLastOrNull() orelse return error.BadState;
|
|
if (finished.flow_list.items.len > 0 or idx > item_start)
|
|
try finished.flow_list.append(Value.newScalar(alloc));
|
|
state = try popStack(&stack);
|
|
},
|
|
else => {
|
|
item_start = idx;
|
|
state = .consuming_list_item;
|
|
},
|
|
},
|
|
.consuming_list_item => switch (char) {
|
|
',' => {
|
|
const tip = try getStackTip(stack);
|
|
|
|
try tip.flow_list.append(
|
|
try Value.fromScalar(alloc, contents[item_start..idx]),
|
|
);
|
|
item_start = idx + 1;
|
|
|
|
state = .want_list_item;
|
|
},
|
|
']' => {
|
|
const finished = stack.getLastOrNull() orelse return error.BadState;
|
|
try finished.flow_list.append(
|
|
try Value.fromScalar(alloc, contents[item_start..idx]),
|
|
);
|
|
state = try popStack(&stack);
|
|
},
|
|
else => continue :charloop,
|
|
},
|
|
.want_list_separator => switch (char) {
|
|
' ', '\t' => continue :charloop,
|
|
',' => {
|
|
item_start = idx;
|
|
state = .want_list_item;
|
|
},
|
|
']' => state = try popStack(&stack),
|
|
else => return error.BadToken,
|
|
},
|
|
.want_map_key => switch (char) {
|
|
' ', '\t' => continue :charloop,
|
|
// forbid these characters so that flow dictionary keys cannot start
|
|
// with characters that regular dictionary keys cannot start with
|
|
// (even though they're unambiguous in this specific context).
|
|
'{', '[', '#', '-', '>', '|', ',' => return error.BadToken,
|
|
':' => {
|
|
// we have an empty map key
|
|
dangling_key = "";
|
|
state = .want_map_value;
|
|
},
|
|
'}' => state = try popStack(&stack),
|
|
else => {
|
|
item_start = idx;
|
|
state = .consuming_map_key;
|
|
},
|
|
},
|
|
.consuming_map_key => switch (char) {
|
|
':' => {
|
|
dangling_key = try alloc.dupe(u8, contents[item_start..idx]);
|
|
state = .want_map_value;
|
|
},
|
|
else => continue :charloop,
|
|
},
|
|
.want_map_value => switch (char) {
|
|
' ', '\t' => continue :charloop,
|
|
',' => {
|
|
const tip = try getStackTip(stack);
|
|
try Parser.putMap(
|
|
&tip.flow_map,
|
|
dangling_key.?,
|
|
Value.newScalar(alloc),
|
|
dupe_behavior,
|
|
);
|
|
|
|
dangling_key = null;
|
|
state = .want_map_key;
|
|
},
|
|
'[' => {
|
|
const tip = try getStackTip(stack);
|
|
|
|
const new_list = try Parser.putMapGetValue(
|
|
&tip.flow_map,
|
|
dangling_key.?,
|
|
Value.newFlowList(alloc),
|
|
dupe_behavior,
|
|
);
|
|
|
|
try stack.append(new_list);
|
|
dangling_key = null;
|
|
item_start = idx + 1;
|
|
state = .want_list_item;
|
|
},
|
|
'{' => {
|
|
const tip = try getStackTip(stack);
|
|
|
|
const new_map = try Parser.putMapGetValue(
|
|
&tip.flow_map,
|
|
dangling_key.?,
|
|
Value.newFlowMap(alloc),
|
|
dupe_behavior,
|
|
);
|
|
|
|
try stack.append(new_map);
|
|
dangling_key = null;
|
|
state = .want_map_key;
|
|
},
|
|
'}' => {
|
|
// the value is an empty string and this map is closed
|
|
const tip = try getStackTip(stack);
|
|
try Parser.putMap(
|
|
&tip.flow_map,
|
|
dangling_key.?,
|
|
Value.newScalar(alloc),
|
|
dupe_behavior,
|
|
);
|
|
|
|
dangling_key = null;
|
|
state = try popStack(&stack);
|
|
},
|
|
else => {
|
|
item_start = idx;
|
|
state = .consuming_map_value;
|
|
},
|
|
},
|
|
.consuming_map_value => switch (char) {
|
|
',', '}' => |term| {
|
|
const tip = try getStackTip(stack);
|
|
try Parser.putMap(
|
|
&tip.flow_map,
|
|
dangling_key.?,
|
|
try Value.fromScalar(alloc, contents[item_start..idx]),
|
|
dupe_behavior,
|
|
);
|
|
dangling_key = null;
|
|
state = .want_map_key;
|
|
if (term == '}') state = try popStack(&stack);
|
|
},
|
|
else => continue :charloop,
|
|
},
|
|
.want_map_separator => switch (char) {
|
|
' ', '\t' => continue :charloop,
|
|
',' => state = .want_map_key,
|
|
'}' => state = try popStack(&stack),
|
|
else => return error.BadToken,
|
|
},
|
|
// the root value was closed but there are characters remaining
|
|
// in the buffer
|
|
.done => return error.BadState,
|
|
}
|
|
}
|
|
// we ran out of characters while still in the middle of an object
|
|
if (state != .done) return error.BadState;
|
|
|
|
return root;
|
|
}
|
|
|
|
inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value {
|
|
try list.append(value);
|
|
return &list.items[list.items.len - 1];
|
|
}
|
|
|
|
inline fn putMap(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void {
|
|
_ = try putMapGetValue(map, key, value, dupe_behavior);
|
|
}
|
|
|
|
inline fn putMapGetValue(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value {
|
|
const gop = try map.getOrPut(key);
|
|
|
|
if (gop.found_existing)
|
|
switch (dupe_behavior) {
|
|
.fail => return error.DuplicateKey,
|
|
.use_first => {},
|
|
.use_last => gop.value_ptr.* = value,
|
|
}
|
|
else
|
|
gop.value_ptr.* = value;
|
|
|
|
return gop.value_ptr;
|
|
}
|
|
|
|
pub fn dumpBufLines(self: *Parser, buf: []const u8) Error!void {
|
|
var tok: LineTokenizer = .{ .buffer = buf, .diagnostics = &self.diagnostics };
|
|
while (try tok.next()) |line| {
|
|
dumpLine(line);
|
|
}
|
|
}
|
|
|
|
fn dumpLine(line: LineTokenizer.Line) void {
|
|
var dedbuf: [64]u8 = .{0} ** 64;
|
|
var keybuf: [2048]u8 = .{0} ** 2048;
|
|
var valbuf: [2048]u8 = .{0} ** 2048;
|
|
|
|
const shiftstr = if (line.indent == .dedent)
|
|
std.fmt.bufPrint(&dedbuf, " ({d})", .{line.indent.dedent}) catch unreachable
|
|
else
|
|
"";
|
|
|
|
std.debug.print("{s}{s}: {s} => {s}\n", .{
|
|
@tagName(line.indent), shiftstr, @tagName(line.contents), switch (line.contents) {
|
|
.comment => |str| str,
|
|
.in_line, .list_item => |scalar| switch (scalar) {
|
|
.empty => "[empty]",
|
|
.scalar,
|
|
.string,
|
|
.flow_list,
|
|
.flow_map,
|
|
=> |str| std.fmt.bufPrint(&keybuf, "{s} => {s}", .{ @tagName(scalar), str }) catch unreachable,
|
|
},
|
|
.map_item => |map| std.fmt.bufPrint(&keybuf, "{s} : {s}", .{
|
|
map.key,
|
|
switch (map.val) {
|
|
.empty => "[empty]",
|
|
.scalar,
|
|
.string,
|
|
.flow_list,
|
|
.flow_map,
|
|
=> |str| std.fmt.bufPrint(&valbuf, "{s} => {s}", .{ @tagName(map.val), str }) catch unreachable,
|
|
},
|
|
}) catch unreachable,
|
|
},
|
|
});
|
|
}
|
|
};
|