2023-09-13 00:11:45 -07:00
|
|
|
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
|
|
|
|
//
|
|
|
|
// - Doesn't support multiline keys (this means map keys cannot start with
|
2023-09-17 19:28:07 -07:00
|
|
|
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
|
2023-09-13 00:11:45 -07:00
|
|
|
// - Allows using tabs for indentation (but not mixed tabs/spaces)
|
|
|
|
// - Indentation must be quantized consistently throughout the document. e.g.
|
|
|
|
// every nested layer being exactly 2 spaces past its parent. Tabs may
|
|
|
|
// only use one tab per indentation level.
|
|
|
|
// - Allows flow-style lists, maps, and strings on the same line as map keys or
|
|
|
|
// list items (i.e. the following are legal):
|
|
|
|
//
|
|
|
|
// key: {inline: map}
|
|
|
|
// key: [inline, list]
|
|
|
|
// key: > inline string
|
|
|
|
// - {map: item}
|
|
|
|
// - [list, item]
|
|
|
|
// - > inline string
|
|
|
|
//
|
|
|
|
// The string case retains the possibility of having an inline map value starting
|
|
|
|
// with {, [, or >
|
2023-09-14 23:38:24 -07:00
|
|
|
// - inline lists and maps cannot contain other inline structures. This may
|
|
|
|
// change, as writing {:[{:[{:[{:[{:[{:[]}]}]}]}]}]} seems tremendously useful
|
2023-09-13 00:11:45 -07:00
|
|
|
// - a map keys and list item dashes must be followed by a value or an indented
|
|
|
|
// section to reduce parser quantum state. This means that
|
|
|
|
//
|
|
|
|
// foo:
|
|
|
|
// bar: baz
|
|
|
|
//
|
2023-09-14 23:38:24 -07:00
|
|
|
// or
|
2023-09-13 00:11:45 -07:00
|
|
|
//
|
|
|
|
// -
|
|
|
|
// - qux
|
|
|
|
//
|
|
|
|
// are not valid. This can be represented with an inline empty string after foo:
|
|
|
|
//
|
|
|
|
// foo: >
|
|
|
|
// bar: baz
|
|
|
|
//
|
|
|
|
// or
|
|
|
|
//
|
|
|
|
// - >
|
|
|
|
// - qux
|
|
|
|
//
|
|
|
|
// - newlines are strictly LF, if the parser finds CR, it is an error
|
|
|
|
// - blank lines may not contain any whitespace characters except the single LF
|
|
|
|
// - Additional string indicator `|` for soft-wrapped strings, i.e.
|
|
|
|
//
|
|
|
|
// key: | this is not special
|
|
|
|
// key:
|
|
|
|
// | these lines are
|
|
|
|
// | soft-wrapped
|
2023-09-14 23:38:24 -07:00
|
|
|
//
|
2023-09-13 00:11:45 -07:00
|
|
|
// soft-wrapped lines are joined with a ' ' instead of a newline character.
|
|
|
|
// Like multiline strings, the final space is stripped (I guess this is a very
|
|
|
|
// janky way to add trailing whitespace to a string).
|
|
|
|
//
|
2023-09-17 23:09:26 -07:00
|
|
|
// - terminated strings to allow trailing whitespace:
|
|
|
|
// | this string has trailing whitespace |
|
|
|
|
// > and so does this one |
|
2023-09-13 00:11:45 -07:00
|
|
|
// - The parser is both strict and probably sloppy and may have weird edge
|
2023-09-14 23:38:24 -07:00
|
|
|
// cases since I'm slinging code, not writing a spec. For example, tabs are
|
|
|
|
// not trimmed from the values of inline lists/maps
|
2023-09-13 00:11:45 -07:00
|
|
|
|
|
|
|
const std = @import("std");
|
|
|
|
|
|
|
|
pub const Diagnostics = struct {
|
|
|
|
row: usize,
|
|
|
|
span: struct { absolute: usize, line_offset: usize, length: usize },
|
|
|
|
message: []const u8,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const LineTokenizer = struct {
|
|
|
|
buffer: []const u8,
|
|
|
|
index: usize = 0,
|
|
|
|
indentation: IndentationType = .immaterial,
|
|
|
|
last_indent: usize = 0,
|
|
|
|
diagnostics: *Diagnostics,
|
|
|
|
|
|
|
|
row: usize = 0,
|
|
|
|
|
|
|
|
const Error = error{
|
|
|
|
BadToken,
|
|
|
|
MixedIndentation,
|
|
|
|
UnquantizedIndentation,
|
2023-09-17 19:28:07 -07:00
|
|
|
TooMuchIndentation,
|
2023-09-13 00:11:45 -07:00
|
|
|
MissingNewline,
|
|
|
|
TrailingWhitespace,
|
|
|
|
Impossible,
|
|
|
|
};
|
|
|
|
|
|
|
|
const IndentationType = union(enum) {
|
|
|
|
immaterial: void,
|
|
|
|
spaces: usize,
|
|
|
|
tabs: void,
|
|
|
|
};
|
|
|
|
|
|
|
|
const InlineItem = union(enum) {
|
|
|
|
empty: void,
|
|
|
|
scalar: []const u8,
|
2023-09-17 23:09:26 -07:00
|
|
|
line_string: []const u8,
|
|
|
|
space_string: []const u8,
|
2023-09-13 00:11:45 -07:00
|
|
|
|
|
|
|
flow_list: []const u8,
|
|
|
|
flow_map: []const u8,
|
2023-09-17 23:09:26 -07:00
|
|
|
|
|
|
|
fn lineEnding(self: InlineItem) u8 {
|
|
|
|
return switch (self) {
|
|
|
|
.line_string => '\n',
|
|
|
|
.space_string => ' ',
|
|
|
|
else => unreachable,
|
|
|
|
};
|
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
const LineContents = union(enum) {
|
|
|
|
comment: []const u8,
|
|
|
|
|
|
|
|
in_line: InlineItem,
|
|
|
|
list_item: InlineItem,
|
|
|
|
map_item: struct { key: []const u8, val: InlineItem },
|
|
|
|
};
|
|
|
|
|
|
|
|
// we can dedent multiple levels at once. Example:
|
|
|
|
//
|
|
|
|
// foo:
|
|
|
|
// bar:
|
|
|
|
// > a
|
|
|
|
// > string
|
|
|
|
// baz: [qux]
|
|
|
|
//
|
|
|
|
// capturing this is conceptually simple, but implementing it without complex
|
|
|
|
// indentation tracking requires quantizing the indentation. This means our
|
|
|
|
// IndentationType will also need to track the number of spaces used for
|
|
|
|
// indentation, as detected. Then every line we have to check indent rem the
|
|
|
|
// quantization level == 0 (otherwise we broke quantization) and compute indent
|
|
|
|
// div the quantization level to give us our effective indentation level.
|
|
|
|
|
|
|
|
const ShiftDirection = enum { indent, dedent, none };
|
|
|
|
const RelativeIndent = union(ShiftDirection) {
|
|
|
|
indent: void,
|
|
|
|
dedent: usize,
|
|
|
|
none: void,
|
|
|
|
};
|
|
|
|
|
|
|
|
const Line = struct {
|
|
|
|
indent: RelativeIndent,
|
|
|
|
contents: LineContents,
|
2023-09-14 23:38:24 -07:00
|
|
|
raw: []const u8,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
pub fn next(self: *LineTokenizer) Error!?Line {
|
|
|
|
if (self.index == self.buffer.len) return null;
|
|
|
|
|
|
|
|
var indent: usize = 0;
|
|
|
|
var offset: usize = 0;
|
|
|
|
|
|
|
|
for (self.buffer[self.index..], 0..) |char, idx| {
|
|
|
|
switch (char) {
|
|
|
|
' ' => {
|
|
|
|
switch (self.indentation) {
|
|
|
|
// There's a weird coupling here because we can't set this until
|
|
|
|
// all spaces have been consumed. I also thought about ignoring
|
|
|
|
// spaces on comment lines since those don't affect the
|
|
|
|
// relative indent/dedent, but then we would allow comments
|
|
|
|
// to ignore our indent quantum, which I dislike due to it making
|
|
|
|
// ugly documents.
|
|
|
|
.immaterial => self.indentation = .{ .spaces = 0 },
|
|
|
|
.spaces => {},
|
|
|
|
.tabs => return error.MixedIndentation,
|
|
|
|
}
|
|
|
|
indent += 1;
|
|
|
|
},
|
|
|
|
'\t' => {
|
|
|
|
switch (self.indentation) {
|
|
|
|
.immaterial => self.indentation = .tabs,
|
|
|
|
.spaces => return error.MixedIndentation,
|
|
|
|
.tabs => {},
|
|
|
|
}
|
|
|
|
indent += 1;
|
|
|
|
},
|
|
|
|
'\r' => {
|
|
|
|
return error.BadToken;
|
|
|
|
},
|
|
|
|
'\n' => {
|
|
|
|
// don't even emit anything for empty rows.
|
|
|
|
self.row += 1;
|
|
|
|
offset = idx + 1;
|
|
|
|
// if it's too hard to deal with, Just Make It An Error!!!
|
|
|
|
// an empty line with whitespace on it is garbage. It can mess with
|
|
|
|
// the indentation detection grossly in a way that is annoying to
|
|
|
|
// deal with. Besides, having whitespace-only lines in a document
|
|
|
|
// is essentially terrorism, with which negotiations are famously
|
|
|
|
// not permitted.
|
|
|
|
if (indent > 0) return error.TrailingWhitespace;
|
|
|
|
},
|
|
|
|
else => break,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
std.debug.assert(self.buffer.len == self.index + indent + offset + 1);
|
|
|
|
self.index = self.buffer.len;
|
|
|
|
// this prong will get hit when the document only consists of whitespace
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
var quantized: usize = if (self.indentation == .spaces) blk: {
|
|
|
|
if (self.indentation.spaces == 0) {
|
|
|
|
self.indentation.spaces = indent;
|
|
|
|
}
|
|
|
|
if (@rem(indent, self.indentation.spaces) != 0)
|
|
|
|
return error.UnquantizedIndentation;
|
|
|
|
|
|
|
|
break :blk @divExact(indent, self.indentation.spaces);
|
|
|
|
} else indent;
|
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
|
|
|
|
if ((quantized - self.last_indent) > 1)
|
|
|
|
return error.TooMuchIndentation;
|
|
|
|
break :rel .indent;
|
|
|
|
} else if (quantized < self.last_indent)
|
2023-09-13 00:11:45 -07:00
|
|
|
.{ .dedent = self.last_indent - quantized }
|
|
|
|
else
|
|
|
|
.none;
|
|
|
|
|
|
|
|
offset += indent;
|
|
|
|
|
|
|
|
defer {
|
|
|
|
self.row += 1;
|
|
|
|
self.last_indent = quantized;
|
|
|
|
self.index += offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
const line = try consumeLine(self.buffer[self.index + offset ..]);
|
|
|
|
offset += line.len + 1;
|
|
|
|
|
|
|
|
// this should not be possible, as empty lines are caught earlier.
|
|
|
|
if (line.len == 0) return error.Impossible;
|
|
|
|
|
|
|
|
switch (line[0]) {
|
|
|
|
'#' => {
|
|
|
|
// simply lie about indentation when the line is a comment.
|
|
|
|
quantized = self.last_indent;
|
|
|
|
return .{
|
|
|
|
.indent = .none,
|
|
|
|
.contents = .{ .comment = line[1..] },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
},
|
|
|
|
'|', '>', '[', '{' => {
|
|
|
|
return .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .in_line = try detectInlineItem(line) },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
},
|
|
|
|
'-' => {
|
|
|
|
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
|
|
|
|
|
|
|
return if (line.len == 1) .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .list_item = .empty },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
} else .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
for (line, 0..) |char, idx| {
|
|
|
|
if (char == ':') {
|
|
|
|
if (idx + 1 == line.len) return .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
if (line[idx + 1] != ' ') return error.BadToken;
|
|
|
|
|
|
|
|
return .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .map_item = .{
|
|
|
|
.key = line[0..idx],
|
|
|
|
.val = try detectInlineItem(line[idx + 2 ..]),
|
|
|
|
} },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return .{
|
|
|
|
.indent = relative,
|
|
|
|
.contents = .{ .in_line = .{ .scalar = line } },
|
2023-09-14 23:38:24 -07:00
|
|
|
.raw = line,
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
|
|
|
if (buf.len == 0) return .empty;
|
|
|
|
|
|
|
|
switch (buf[0]) {
|
2023-09-17 23:09:26 -07:00
|
|
|
'>', '|' => |char| {
|
2023-09-13 00:11:45 -07:00
|
|
|
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
|
|
|
|
2023-09-17 23:09:26 -07:00
|
|
|
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
|
|
|
' ', '\t' => return error.TrailingWhitespace,
|
|
|
|
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
|
|
|
else => buf[@min(2, buf.len)..buf.len],
|
2023-09-13 00:11:45 -07:00
|
|
|
};
|
2023-09-17 23:09:26 -07:00
|
|
|
|
|
|
|
return if (char == '>')
|
|
|
|
.{ .line_string = slice }
|
|
|
|
else
|
|
|
|
.{ .space_string = slice };
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
'[' => {
|
2023-09-17 23:09:26 -07:00
|
|
|
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
|
|
|
return error.BadToken;
|
2023-09-13 00:11:45 -07:00
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
// keep the closing ] for the flow parser
|
|
|
|
return .{ .flow_list = buf[1..] };
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
'{' => {
|
2023-09-17 23:09:26 -07:00
|
|
|
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
|
|
|
return error.BadToken;
|
2023-09-13 00:11:45 -07:00
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
// keep the closing } fpr the flow parser
|
|
|
|
return .{ .flow_map = buf[1..] };
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
else => {
|
2023-09-17 23:09:26 -07:00
|
|
|
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
|
|
|
return error.TrailingWhitespace;
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
return .{ .scalar = buf };
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn consumeLine(buf: []const u8) ![]const u8 {
|
|
|
|
for (buf, 0..) |char, idx| {
|
|
|
|
switch (char) {
|
|
|
|
'\n' => return buf[0..idx],
|
|
|
|
'\r' => return error.BadToken,
|
|
|
|
else => {},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return error.MissingNewline;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
pub const Value = union(enum) {
|
|
|
|
pub const String = std.ArrayList(u8);
|
|
|
|
pub const Map = std.StringHashMap(Value);
|
|
|
|
pub const List = std.ArrayList(Value);
|
|
|
|
|
|
|
|
string: String,
|
|
|
|
list: List,
|
|
|
|
map: Map,
|
|
|
|
|
|
|
|
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
|
|
var res: Value = .{ .string = try String.initCapacity(alloc, input.len) };
|
|
|
|
res.string.appendSliceAssumeCapacity(input);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
|
|
|
return .{ .string = String.init(alloc) };
|
|
|
|
}
|
|
|
|
|
|
|
|
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
|
|
|
return .{ .list = List.init(alloc) };
|
|
|
|
}
|
|
|
|
|
|
|
|
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
|
|
|
return .{ .map = Map.init(alloc) };
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn printDebug(self: Value) void {
|
|
|
|
self.printRecursive(0);
|
|
|
|
std.debug.print("\n", .{});
|
|
|
|
}
|
|
|
|
|
|
|
|
fn printRecursive(self: Value, indent: usize) void {
|
|
|
|
switch (self) {
|
|
|
|
.string => |str| {
|
|
|
|
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
|
|
|
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
|
|
|
std.debug.print("\n", .{});
|
|
|
|
while (lines.next()) |line| {
|
|
|
|
std.debug.print(
|
|
|
|
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
|
|
|
.{
|
|
|
|
.empty = "",
|
|
|
|
.indent = indent,
|
|
|
|
.line = line,
|
|
|
|
.nl = if (lines.peek() == null) "" else "\n",
|
|
|
|
},
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
std.debug.print("{s}", .{str.items});
|
|
|
|
}
|
|
|
|
},
|
|
|
|
.list => |list| {
|
|
|
|
if (list.items.len == 0) {
|
|
|
|
std.debug.print("[]", .{});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
std.debug.print("[\n", .{});
|
|
|
|
for (list.items, 0..) |value, idx| {
|
|
|
|
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
|
|
|
value.printRecursive(indent + 2);
|
|
|
|
std.debug.print(",\n", .{});
|
|
|
|
}
|
|
|
|
std.debug.print(
|
|
|
|
"{[empty]s: >[indent]}]",
|
|
|
|
.{ .empty = "", .indent = indent },
|
|
|
|
);
|
|
|
|
},
|
|
|
|
.map => |map| {
|
|
|
|
if (map.count() == 0) {
|
|
|
|
std.debug.print("{{}}", .{});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
std.debug.print("{{\n", .{});
|
|
|
|
|
|
|
|
var iter = map.iterator();
|
|
|
|
|
|
|
|
while (iter.next()) |entry| {
|
|
|
|
std.debug.print(
|
|
|
|
"{[empty]s: >[indent]}{[key]s}: ",
|
|
|
|
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
|
|
|
);
|
|
|
|
entry.value_ptr.printRecursive(indent + 4);
|
|
|
|
std.debug.print(",\n", .{});
|
|
|
|
}
|
|
|
|
std.debug.print(
|
|
|
|
"{[empty]s: >[indent]}}}",
|
|
|
|
.{ .empty = "", .indent = indent },
|
|
|
|
);
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
pub const Parser = struct {
|
|
|
|
allocator: std.mem.Allocator,
|
|
|
|
dupe_behavior: DuplicateKeyBehavior = .fail,
|
|
|
|
default_object: DefaultObject = .fail,
|
|
|
|
diagnostics: Diagnostics = .{
|
|
|
|
.row = 0,
|
|
|
|
.span = .{ .absolute = 0, .line_offset = 0, .length = 0 },
|
|
|
|
.message = "all is well",
|
|
|
|
},
|
|
|
|
|
|
|
|
pub const Error = error{
|
|
|
|
UnexpectedIndent,
|
|
|
|
UnexpectedValue,
|
|
|
|
ExtraContent,
|
|
|
|
EmptyDocument,
|
|
|
|
DuplicateKey,
|
2023-09-14 23:38:24 -07:00
|
|
|
BadMapEntry,
|
2023-09-13 00:11:45 -07:00
|
|
|
Fail,
|
2023-09-17 19:28:07 -07:00
|
|
|
} || LineTokenizer.Error || FlowParser.Error || std.mem.Allocator.Error;
|
2023-09-13 00:11:45 -07:00
|
|
|
|
|
|
|
pub const DuplicateKeyBehavior = enum {
|
|
|
|
use_first,
|
|
|
|
use_last,
|
|
|
|
fail,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const DefaultObject = enum {
|
|
|
|
string,
|
|
|
|
list,
|
|
|
|
map,
|
|
|
|
fail,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const ParseState = enum {
|
|
|
|
initial,
|
|
|
|
value,
|
|
|
|
done,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const Document = struct {
|
|
|
|
arena: std.heap.ArenaAllocator,
|
|
|
|
root: Value,
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
pub fn init(alloc: std.mem.Allocator) Document {
|
|
|
|
return .{
|
|
|
|
.arena = std.heap.ArenaAllocator.init(alloc),
|
|
|
|
.root = undefined,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-09-14 23:38:24 -07:00
|
|
|
pub fn printDebug(self: Document) void {
|
|
|
|
return self.root.printDebug();
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn deinit(self: Document) void {
|
2023-09-13 00:11:45 -07:00
|
|
|
self.arena.deinit();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-09-14 23:38:24 -07:00
|
|
|
pub fn parseBuffer(self: *Parser, buffer: []const u8) Error!Document {
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
var document = Document.init(self.allocator);
|
2023-09-13 00:11:45 -07:00
|
|
|
errdefer document.deinit();
|
|
|
|
const arena_alloc = document.arena.allocator();
|
|
|
|
|
|
|
|
var state: ParseState = .initial;
|
|
|
|
var expect_shift: LineTokenizer.ShiftDirection = .none;
|
2023-09-17 19:28:07 -07:00
|
|
|
var dangling_key: ?[]const u8 = null;
|
2023-09-13 00:11:45 -07:00
|
|
|
var stack = std.ArrayList(*Value).init(arena_alloc);
|
|
|
|
defer stack.deinit();
|
|
|
|
|
|
|
|
var tok: LineTokenizer = .{ .buffer = buffer, .diagnostics = &self.diagnostics };
|
|
|
|
while (try tok.next()) |line| {
|
|
|
|
if (line.contents == .comment) continue;
|
|
|
|
|
|
|
|
var flip = true;
|
|
|
|
var flop = false;
|
|
|
|
// this is needed to give us a second go round when the line is dedented
|
|
|
|
flipflop: while (flip) : (flop = true) {
|
|
|
|
switch (state) {
|
|
|
|
.initial => {
|
|
|
|
if (line.indent == .indent) return error.UnexpectedIndent;
|
|
|
|
|
|
|
|
switch (line.contents) {
|
|
|
|
// we filter out comments above
|
|
|
|
.comment => unreachable,
|
|
|
|
.in_line => |in_line| switch (in_line) {
|
|
|
|
// empty scalars are only emitted for a list_item or a map_item
|
|
|
|
.empty => unreachable,
|
|
|
|
.scalar => |str| {
|
|
|
|
document.root = try valueFromString(arena_alloc, str);
|
2023-09-17 23:09:26 -07:00
|
|
|
// this is a cheesy hack. If the document consists
|
|
|
|
// solely of a scalar, the finalizer will try to
|
|
|
|
// chop a line ending off of it, so we need to add
|
|
|
|
// a sacrificial padding character to avoid
|
|
|
|
// chopping off something that matters.
|
|
|
|
try document.root.string.append(' ');
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .done;
|
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string => |str| {
|
2023-09-14 23:38:24 -07:00
|
|
|
document.root = try valueFromString(arena_alloc, str);
|
2023-09-17 23:09:26 -07:00
|
|
|
try document.root.string.append(in_line.lineEnding());
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(&document.root);
|
|
|
|
state = .value;
|
|
|
|
},
|
2023-09-13 00:11:45 -07:00
|
|
|
.flow_list => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
document.root = try parseFlowList(arena_alloc, str, self.dupe_behavior);
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .done;
|
|
|
|
},
|
|
|
|
.flow_map => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
document.root = try parseFlowMap(arena_alloc, str, self.dupe_behavior);
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .done;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
.list_item => |value| {
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
document.root = .{ .list = Value.List.init(arena_alloc) };
|
2023-09-13 00:11:45 -07:00
|
|
|
try stack.append(&document.root);
|
|
|
|
|
|
|
|
switch (value) {
|
|
|
|
.empty => {
|
|
|
|
expect_shift = .indent;
|
|
|
|
state = .value;
|
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| {
|
|
|
|
try document.root.list.append(try valueFromString(arena_alloc, str));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
.flow_list => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
try document.root.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
.flow_map => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
try document.root.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
.map_item => |pair| {
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
document.root = .{ .map = Value.Map.init(arena_alloc) };
|
2023-09-13 00:11:45 -07:00
|
|
|
try stack.append(&document.root);
|
|
|
|
|
|
|
|
switch (pair.val) {
|
|
|
|
.empty => {
|
|
|
|
expect_shift = .indent;
|
|
|
|
// If the key is on its own line, we don't have
|
|
|
|
// an associated value until we parse the next
|
|
|
|
// line. We need to store a reference to this
|
|
|
|
// key somewhere until we can consume the
|
|
|
|
// value. More parser state to lug along.
|
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = pair.key;
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| {
|
2023-09-13 00:11:45 -07:00
|
|
|
// we can do direct puts here because this is
|
|
|
|
// the very first line of the document
|
2023-09-17 23:09:26 -07:00
|
|
|
try document.root.map.put(pair.key, try valueFromString(arena_alloc, str));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
.flow_list => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
try document.root.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
.flow_map => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
try document.root.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior));
|
2023-09-13 00:11:45 -07:00
|
|
|
state = .value;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
.value => switch (stack.getLast().*) {
|
2023-09-13 00:11:45 -07:00
|
|
|
.string => |*string| {
|
2023-09-17 23:09:26 -07:00
|
|
|
if (line.indent == .indent)
|
|
|
|
return error.UnexpectedIndent;
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
if (!flop and line.indent == .dedent) {
|
2023-09-17 23:09:26 -07:00
|
|
|
// kick off the last trailing space or newline
|
2023-09-17 19:28:07 -07:00
|
|
|
_ = string.pop();
|
2023-09-13 00:11:45 -07:00
|
|
|
|
|
|
|
var dedent_depth = line.indent.dedent;
|
|
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
|
|
_ = stack.pop();
|
|
|
|
|
|
|
|
continue :flipflop;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (line.contents) {
|
|
|
|
.comment => unreachable,
|
|
|
|
.in_line => |in_line| switch (in_line) {
|
|
|
|
.empty => unreachable,
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string => |str| {
|
2023-09-14 23:38:24 -07:00
|
|
|
try string.appendSlice(str);
|
2023-09-17 23:09:26 -07:00
|
|
|
try string.append(in_line.lineEnding());
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
2023-09-13 00:11:45 -07:00
|
|
|
else => return error.UnexpectedValue,
|
|
|
|
},
|
|
|
|
else => return error.UnexpectedValue,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
.list => |*list| {
|
2023-09-17 23:09:26 -07:00
|
|
|
// detect that the previous item was actually empty
|
|
|
|
//
|
|
|
|
// -
|
|
|
|
// - something
|
|
|
|
//
|
|
|
|
// the first line here creates the expect_shift, but the second line
|
|
|
|
// is a valid continuation of the list despite not being indented
|
2023-09-14 23:38:24 -07:00
|
|
|
if (expect_shift == .indent and line.indent != .indent)
|
|
|
|
try list.append(try valueFromString(arena_alloc, ""));
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
// Consider:
|
|
|
|
//
|
2023-09-17 23:09:26 -07:00
|
|
|
// -
|
|
|
|
// own-line scalar
|
|
|
|
// - inline scalar
|
2023-09-13 00:11:45 -07:00
|
|
|
//
|
|
|
|
// the own-line scalar will not push the stack but the next list item will be a dedent
|
|
|
|
if (!flop and line.indent == .dedent) {
|
|
|
|
// if line.indent.dedent is 1 and we're expecting it, the stack will not be popped,
|
|
|
|
// but we will continue loop flipflop. However, flop will be set to false on the next
|
|
|
|
// trip, so this if prong will not be run again.
|
|
|
|
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
|
|
|
|
|
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
|
|
_ = stack.pop();
|
|
|
|
|
|
|
|
continue :flipflop;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (line.contents) {
|
|
|
|
.comment => unreachable,
|
|
|
|
.in_line => |in_line| {
|
|
|
|
// assert that this line has been indented. this is required for an inline value when
|
|
|
|
// the stack is in list mode.
|
2023-09-14 23:38:24 -07:00
|
|
|
if (expect_shift != .indent or line.indent != .indent)
|
|
|
|
return error.UnexpectedValue;
|
2023-09-13 00:11:45 -07:00
|
|
|
|
2023-09-14 23:38:24 -07:00
|
|
|
expect_shift = .dedent;
|
2023-09-13 00:11:45 -07:00
|
|
|
switch (in_line) {
|
|
|
|
.empty => unreachable,
|
2023-09-14 23:38:24 -07:00
|
|
|
.scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string => |str| {
|
2023-09-13 00:11:45 -07:00
|
|
|
// string pushes the stack
|
2023-09-14 23:38:24 -07:00
|
|
|
const new_string = try appendListGetValue(list, try valueFromString(arena_alloc, str));
|
|
|
|
|
2023-09-17 23:09:26 -07:00
|
|
|
try new_string.string.append(in_line.lineEnding());
|
2023-09-14 23:38:24 -07:00
|
|
|
|
|
|
|
try stack.append(new_string);
|
2023-09-13 00:11:45 -07:00
|
|
|
expect_shift = .none;
|
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
},
|
|
|
|
.list_item => |value| {
|
|
|
|
switch (line.indent) {
|
|
|
|
// for dedent, the stack has already been popped, so this should be fine
|
|
|
|
.none, .dedent => {
|
|
|
|
expect_shift = .none;
|
|
|
|
switch (value) {
|
|
|
|
.empty => expect_shift = .indent,
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try list.append(try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
// a new list is being created
|
|
|
|
.indent => {
|
|
|
|
if (expect_shift != .indent)
|
|
|
|
return error.UnexpectedIndent;
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
const new_list = try appendListGetValue(list, .{ .list = Value.List.init(arena_alloc) });
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(new_list);
|
|
|
|
|
|
|
|
expect_shift = .none;
|
|
|
|
switch (value) {
|
|
|
|
.empty => expect_shift = .indent,
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
.map_item => |pair| {
|
|
|
|
// this prong cannot be hit on dedent in a valid way.
|
|
|
|
//
|
|
|
|
// -
|
|
|
|
// map: value
|
|
|
|
// second: value
|
|
|
|
// third: value
|
|
|
|
//
|
|
|
|
// dedenting back to the list stack level requires list_item
|
|
|
|
|
|
|
|
if (line.indent != .indent)
|
|
|
|
return error.UnexpectedValue;
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
const new_map = try appendListGetValue(list, .{ .map = Value.Map.init(arena_alloc) });
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(new_map);
|
|
|
|
expect_shift = .none;
|
|
|
|
|
|
|
|
switch (pair.val) {
|
|
|
|
.empty => {
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = pair.key;
|
2023-09-14 23:38:24 -07:00
|
|
|
expect_shift = .indent;
|
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
.map => |*map| {
|
2023-09-17 23:09:26 -07:00
|
|
|
// detect that the previous item was actually empty
|
|
|
|
//
|
|
|
|
// foo:
|
|
|
|
// bar: baz
|
|
|
|
//
|
|
|
|
// the first line here creates the expect_shift, but the second line
|
|
|
|
// is a valid continuation of the map despite not being indented
|
2023-09-14 23:38:24 -07:00
|
|
|
if (expect_shift == .indent and line.indent != .indent) {
|
2023-09-17 19:28:07 -07:00
|
|
|
try putMap(
|
2023-09-14 23:38:24 -07:00
|
|
|
map,
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key orelse return error.Fail,
|
2023-09-14 23:38:24 -07:00
|
|
|
try valueFromString(arena_alloc, ""),
|
2023-09-17 19:28:07 -07:00
|
|
|
self.dupe_behavior,
|
2023-09-14 23:38:24 -07:00
|
|
|
);
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = null;
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
if (!flop and line.indent == .dedent) {
|
2023-09-14 23:38:24 -07:00
|
|
|
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
|
|
|
|
|
|
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
|
|
|
_ = stack.pop();
|
|
|
|
|
2023-09-13 00:11:45 -07:00
|
|
|
continue :flipflop;
|
|
|
|
}
|
2023-09-14 23:38:24 -07:00
|
|
|
|
|
|
|
switch (line.contents) {
|
|
|
|
.comment => unreachable,
|
|
|
|
.in_line => |in_line| {
|
|
|
|
// assert that this line has been indented. this is required for an inline value when
|
|
|
|
// the stack is in map mode.
|
2023-09-17 19:28:07 -07:00
|
|
|
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
2023-09-14 23:38:24 -07:00
|
|
|
return error.UnexpectedValue;
|
|
|
|
|
|
|
|
expect_shift = .dedent;
|
|
|
|
|
|
|
|
switch (in_line) {
|
|
|
|
.empty => unreachable,
|
2023-09-17 19:28:07 -07:00
|
|
|
.scalar => |str| try putMap(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior),
|
|
|
|
.flow_list => |str| try putMap(map, dangling_key.?, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
|
2023-09-14 23:38:24 -07:00
|
|
|
.flow_map => |str| {
|
2023-09-17 19:28:07 -07:00
|
|
|
try putMap(map, dangling_key.?, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior);
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string => |str| {
|
2023-09-14 23:38:24 -07:00
|
|
|
// string pushes the stack
|
2023-09-17 19:28:07 -07:00
|
|
|
const new_string = try putMapGetValue(map, dangling_key.?, try valueFromString(arena_alloc, str), self.dupe_behavior);
|
2023-09-17 23:09:26 -07:00
|
|
|
try new_string.string.append(in_line.lineEnding());
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(new_string);
|
|
|
|
expect_shift = .none;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = null;
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
|
|
|
.list_item => |value| {
|
|
|
|
// this prong cannot be hit on dedent in a valid way.
|
|
|
|
//
|
|
|
|
// map:
|
|
|
|
// - value
|
|
|
|
// - invalid
|
|
|
|
//
|
|
|
|
// dedenting back to the map stack level requires map_item
|
|
|
|
|
2023-09-17 19:28:07 -07:00
|
|
|
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
2023-09-14 23:38:24 -07:00
|
|
|
return error.UnexpectedValue;
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
const new_list = try putMapGetValue(map, dangling_key.?, .{ .list = Value.List.init(arena_alloc) }, self.dupe_behavior);
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(new_list);
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = null;
|
2023-09-14 23:38:24 -07:00
|
|
|
|
|
|
|
expect_shift = .none;
|
|
|
|
switch (value) {
|
|
|
|
.empty => expect_shift = .indent,
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try new_list.list.append(try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try new_list.list.append(try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try new_list.list.append(try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
},
|
|
|
|
.map_item => |pair| {
|
|
|
|
expect_shift = .none;
|
|
|
|
switch (line.indent) {
|
|
|
|
// for dedent, the stack has already been popped, so this should be fine
|
|
|
|
.none, .dedent => switch (pair.val) {
|
|
|
|
.empty => {
|
|
|
|
expect_shift = .indent;
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = pair.key;
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try putMap(map, pair.key, try valueFromString(arena_alloc, str), self.dupe_behavior),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try putMap(map, pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
|
|
|
|
.flow_map => |str| try putMap(map, pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior), self.dupe_behavior),
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
|
|
|
// a new map is being created
|
|
|
|
.indent => {
|
2023-09-17 19:28:07 -07:00
|
|
|
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
|
2023-09-14 23:38:24 -07:00
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
const new_map = try putMapGetValue(map, dangling_key.?, .{ .map = Value.Map.init(arena_alloc) }, self.dupe_behavior);
|
2023-09-14 23:38:24 -07:00
|
|
|
try stack.append(new_map);
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = null;
|
2023-09-14 23:38:24 -07:00
|
|
|
|
|
|
|
switch (pair.val) {
|
|
|
|
.empty => {
|
|
|
|
expect_shift = .indent;
|
2023-09-17 19:28:07 -07:00
|
|
|
dangling_key = pair.key;
|
2023-09-14 23:38:24 -07:00
|
|
|
},
|
2023-09-17 23:09:26 -07:00
|
|
|
.line_string, .space_string, .scalar => |str| try new_map.map.put(pair.key, try valueFromString(arena_alloc, str)),
|
2023-09-17 19:28:07 -07:00
|
|
|
.flow_list => |str| try new_map.map.put(pair.key, try parseFlowList(arena_alloc, str, self.dupe_behavior)),
|
|
|
|
.flow_map => |str| try new_map.map.put(pair.key, try parseFlowMap(arena_alloc, str, self.dupe_behavior)),
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
},
|
|
|
|
.done => return error.ExtraContent,
|
|
|
|
}
|
|
|
|
|
2023-09-14 23:38:24 -07:00
|
|
|
// this is specifically performed at the end of the loop body so that
|
|
|
|
// `continue :flipflop` skips setting it.
|
|
|
|
flip = false;
|
|
|
|
}
|
2023-09-13 00:11:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
.initial => switch (self.default_object) {
|
|
|
|
.string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) },
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
.list => document.root = .{ .list = Value.List.init(arena_alloc) },
|
|
|
|
.map => document.root = .{ .map = Value.Map.init(arena_alloc) },
|
2023-09-13 00:11:45 -07:00
|
|
|
.fail => return error.EmptyDocument,
|
|
|
|
},
|
2023-09-14 23:38:24 -07:00
|
|
|
.value => switch (stack.getLast().*) {
|
|
|
|
// remove the final trailing newline or space
|
2023-09-17 19:28:07 -07:00
|
|
|
.string => |*string| _ = string.popOrNull(),
|
2023-09-14 23:38:24 -07:00
|
|
|
// if we have a dangling -, attach an empty string to it
|
|
|
|
.list => |*list| if (expect_shift == .indent) try list.append(try valueFromString(arena_alloc, "")),
|
2023-09-17 19:28:07 -07:00
|
|
|
// if we have a dangling "key:", attach an empty string to it
|
|
|
|
.map => |*map| if (dangling_key) |dk| try putMap(map, dk, try valueFromString(arena_alloc, ""), self.dupe_behavior),
|
2023-09-13 00:11:45 -07:00
|
|
|
},
|
|
|
|
.done => {},
|
|
|
|
}
|
|
|
|
|
|
|
|
return document;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn valueFromString(alloc: std.mem.Allocator, buffer: []const u8) Error!Value {
|
|
|
|
var result: Value = .{ .string = try std.ArrayList(u8).initCapacity(alloc, buffer.len) };
|
|
|
|
result.string.appendSliceAssumeCapacity(buffer);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-09-17 19:47:18 -07:00
|
|
|
fn parseFlowList(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
|
|
|
|
var parser = try FlowParser.initList(alloc, contents);
|
|
|
|
defer parser.deinit();
|
2023-09-13 00:11:45 -07:00
|
|
|
|
2023-09-17 19:47:18 -07:00
|
|
|
return try parser.parse(dupe_behavior);
|
2023-09-13 00:11:45 -07:00
|
|
|
}
|
|
|
|
|
2023-09-17 19:47:18 -07:00
|
|
|
fn parseFlowMap(alloc: std.mem.Allocator, contents: []const u8, dupe_behavior: DuplicateKeyBehavior) Error!Value {
|
|
|
|
var parser = try FlowParser.initMap(alloc, contents);
|
|
|
|
defer parser.deinit();
|
2023-09-14 23:38:24 -07:00
|
|
|
|
2023-09-17 19:47:18 -07:00
|
|
|
return try parser.parse(dupe_behavior);
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
inline fn appendListGetValue(list: *Value.List, value: Value) Error!*Value {
|
2023-09-14 23:38:24 -07:00
|
|
|
try list.append(value);
|
|
|
|
return &list.items[list.items.len - 1];
|
|
|
|
}
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
inline fn putMap(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!void {
|
2023-09-17 19:28:07 -07:00
|
|
|
_ = try putMapGetValue(map, key, value, dupe_behavior);
|
2023-09-14 23:38:24 -07:00
|
|
|
}
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
inline fn putMapGetValue(map: *Value.Map, key: []const u8, value: Value, dupe_behavior: DuplicateKeyBehavior) Error!*Value {
|
2023-09-14 23:38:24 -07:00
|
|
|
const gop = try map.getOrPut(key);
|
|
|
|
|
|
|
|
if (gop.found_existing)
|
2023-09-17 19:28:07 -07:00
|
|
|
switch (dupe_behavior) {
|
2023-09-14 23:38:24 -07:00
|
|
|
.fail => return error.DuplicateKey,
|
|
|
|
.use_first => {},
|
|
|
|
.use_last => gop.value_ptr.* = value,
|
|
|
|
}
|
|
|
|
else
|
|
|
|
gop.value_ptr.* = value;
|
|
|
|
|
|
|
|
return gop.value_ptr;
|
2023-09-13 00:11:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn dumpBufLines(self: *Parser, buf: []const u8) Error!void {
|
|
|
|
var tok: LineTokenizer = .{ .buffer = buf, .diagnostics = &self.diagnostics };
|
|
|
|
while (try tok.next()) |line| {
|
|
|
|
dumpLine(line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn dumpLine(line: LineTokenizer.Line) void {
|
|
|
|
var dedbuf: [64]u8 = .{0} ** 64;
|
|
|
|
var keybuf: [2048]u8 = .{0} ** 2048;
|
|
|
|
var valbuf: [2048]u8 = .{0} ** 2048;
|
|
|
|
|
|
|
|
const shiftstr = if (line.indent == .dedent)
|
|
|
|
std.fmt.bufPrint(&dedbuf, " ({d})", .{line.indent.dedent}) catch unreachable
|
|
|
|
else
|
|
|
|
"";
|
|
|
|
|
|
|
|
std.debug.print("{s}{s}: {s} => {s}\n", .{
|
|
|
|
@tagName(line.indent), shiftstr, @tagName(line.contents), switch (line.contents) {
|
|
|
|
.comment => |str| str,
|
|
|
|
.in_line, .list_item => |scalar| switch (scalar) {
|
|
|
|
.empty => "[empty]",
|
|
|
|
.scalar,
|
|
|
|
.string,
|
|
|
|
.flow_list,
|
|
|
|
.flow_map,
|
|
|
|
=> |str| std.fmt.bufPrint(&keybuf, "{s} => {s}", .{ @tagName(scalar), str }) catch unreachable,
|
|
|
|
},
|
|
|
|
.map_item => |map| std.fmt.bufPrint(&keybuf, "{s} : {s}", .{
|
|
|
|
map.key,
|
|
|
|
switch (map.val) {
|
|
|
|
.empty => "[empty]",
|
|
|
|
.scalar,
|
|
|
|
.string,
|
|
|
|
.flow_list,
|
|
|
|
.flow_map,
|
|
|
|
=> |str| std.fmt.bufPrint(&valbuf, "{s} => {s}", .{ @tagName(map.val), str }) catch unreachable,
|
|
|
|
},
|
|
|
|
}) catch unreachable,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
};
|
2023-09-17 19:47:18 -07:00
|
|
|
|
|
|
|
pub const FlowParser = struct {
|
|
|
|
const FlowStackItem = struct {
|
|
|
|
value: *Value,
|
|
|
|
// lists need this. maps do also for keys and values.
|
|
|
|
item_start: usize = 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
const FlowStack: type = std.ArrayList(FlowStackItem);
|
|
|
|
|
|
|
|
buffer: []const u8,
|
|
|
|
root: Value,
|
|
|
|
alloc: std.mem.Allocator,
|
|
|
|
stack: FlowStack,
|
|
|
|
state: ParseState,
|
|
|
|
|
|
|
|
// make this an ugly state machine parser
|
|
|
|
const ParseState = enum {
|
|
|
|
want_list_item,
|
|
|
|
consuming_list_item,
|
|
|
|
want_list_separator,
|
|
|
|
want_map_key,
|
|
|
|
consuming_map_key,
|
|
|
|
want_map_value,
|
|
|
|
consuming_map_value,
|
|
|
|
want_map_separator,
|
|
|
|
done,
|
|
|
|
};
|
|
|
|
|
|
|
|
const Error = error{
|
|
|
|
BadState,
|
|
|
|
BadToken,
|
|
|
|
} || std.mem.Allocator.Error;
|
|
|
|
|
|
|
|
pub fn initList(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
|
|
|
|
return .{
|
|
|
|
.buffer = buffer,
|
|
|
|
.root = undefined,
|
|
|
|
.alloc = alloc,
|
|
|
|
.stack = undefined,
|
|
|
|
.state = .want_list_item,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn initMap(alloc: std.mem.Allocator, buffer: []const u8) Error!FlowParser {
|
|
|
|
return .{
|
|
|
|
.buffer = buffer,
|
|
|
|
.root = undefined,
|
|
|
|
.alloc = alloc,
|
|
|
|
.stack = undefined,
|
|
|
|
.state = .want_map_key,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn deinit(self: *FlowParser) void {
|
|
|
|
self.stack.deinit();
|
|
|
|
}
|
|
|
|
|
|
|
|
inline fn getStackTip(stack: FlowStack) Error!*FlowStackItem {
|
|
|
|
if (stack.items.len == 0) return error.BadState;
|
|
|
|
return &stack.items[stack.items.len - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
inline fn setStackItemStart(stack: FlowStack, start: usize) Error!void {
|
|
|
|
if (stack.items.len == 0) return error.BadState;
|
|
|
|
stack.items[stack.items.len - 1].item_start = start;
|
|
|
|
}
|
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
inline fn popStack(self: *FlowParser) Parser.Error!ParseState {
|
|
|
|
if (self.stack.popOrNull() == null)
|
|
|
|
return error.BadState;
|
2023-09-17 19:47:18 -07:00
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
const parent = self.stack.getLastOrNull() orelse return .done;
|
2023-09-17 19:47:18 -07:00
|
|
|
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
return switch (parent.value.*) {
|
|
|
|
.list => .want_list_separator,
|
|
|
|
.map => .want_map_separator,
|
2023-09-17 19:47:18 -07:00
|
|
|
else => return error.BadState,
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
};
|
2023-09-17 19:47:18 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn parse(self: *FlowParser, dupe_behavior: Parser.DuplicateKeyBehavior) Parser.Error!Value {
|
|
|
|
// prime the stack:
|
|
|
|
switch (self.state) {
|
|
|
|
.want_list_item => {
|
|
|
|
self.root = Value.newList(self.alloc);
|
|
|
|
self.stack = try FlowStack.initCapacity(self.alloc, 1);
|
|
|
|
self.stack.appendAssumeCapacity(.{ .value = &self.root });
|
|
|
|
},
|
|
|
|
.want_map_key => {
|
|
|
|
self.root = Value.newMap(self.alloc);
|
|
|
|
self.stack = try FlowStack.initCapacity(self.alloc, 1);
|
|
|
|
self.stack.appendAssumeCapacity(.{ .value = &self.root });
|
|
|
|
},
|
|
|
|
else => {
|
|
|
|
return error.BadState;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2023-09-17 23:09:26 -07:00
|
|
|
var dangling_key: ?[]const u8 = null;
|
|
|
|
|
2023-09-17 19:47:18 -07:00
|
|
|
charloop: for (self.buffer, 0..) |char, idx| {
|
|
|
|
// std.debug.print("{s} => {c}\n", .{ @tagName(self.state), char });
|
|
|
|
switch (self.state) {
|
|
|
|
.want_list_item => switch (char) {
|
|
|
|
' ', '\t' => continue :charloop,
|
|
|
|
',' => {
|
|
|
|
// empty value
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
try tip.value.list.append(try Value.fromString(self.alloc, ""));
|
|
|
|
tip.item_start = idx + 1;
|
|
|
|
},
|
|
|
|
'{' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
|
|
|
|
const new_map = try Parser.appendListGetValue(
|
|
|
|
&tip.value.list,
|
|
|
|
Value.newMap(self.alloc),
|
|
|
|
);
|
|
|
|
|
|
|
|
tip.item_start = idx;
|
|
|
|
try self.stack.append(.{ .value = new_map });
|
|
|
|
self.state = .want_map_key;
|
|
|
|
},
|
|
|
|
'[' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
|
|
|
|
const new_list = try Parser.appendListGetValue(
|
|
|
|
&tip.value.list,
|
|
|
|
Value.newList(self.alloc),
|
|
|
|
);
|
|
|
|
|
|
|
|
tip.item_start = idx;
|
|
|
|
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
|
|
|
|
self.state = .want_list_item;
|
|
|
|
},
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
']' => {
|
|
|
|
const finished = self.stack.getLastOrNull() orelse return error.BadState;
|
|
|
|
if (finished.value.list.items.len > 0 or idx > finished.item_start)
|
|
|
|
try finished.value.list.append(
|
|
|
|
try Parser.valueFromString(self.alloc, ""),
|
|
|
|
);
|
|
|
|
self.state = try self.popStack();
|
|
|
|
},
|
2023-09-17 19:47:18 -07:00
|
|
|
else => {
|
|
|
|
try setStackItemStart(self.stack, idx);
|
|
|
|
self.state = .consuming_list_item;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
.consuming_list_item => switch (char) {
|
|
|
|
',' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
|
|
|
|
try tip.value.list.append(
|
|
|
|
try Value.fromString(self.alloc, self.buffer[tip.item_start..idx]),
|
|
|
|
);
|
|
|
|
tip.item_start = idx + 1;
|
|
|
|
|
|
|
|
self.state = .want_list_item;
|
|
|
|
},
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
']' => {
|
|
|
|
const finished = self.stack.getLastOrNull() orelse return error.BadState;
|
|
|
|
try finished.value.list.append(
|
|
|
|
try Parser.valueFromString(
|
|
|
|
self.alloc,
|
|
|
|
self.buffer[finished.item_start..idx],
|
|
|
|
),
|
|
|
|
);
|
|
|
|
self.state = try self.popStack();
|
|
|
|
},
|
2023-09-17 19:47:18 -07:00
|
|
|
else => continue :charloop,
|
|
|
|
},
|
|
|
|
.want_list_separator => switch (char) {
|
|
|
|
' ', '\t' => continue :charloop,
|
|
|
|
',' => {
|
|
|
|
try setStackItemStart(self.stack, idx);
|
|
|
|
self.state = .want_list_item;
|
|
|
|
},
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
']' => self.state = try self.popStack(),
|
2023-09-17 19:47:18 -07:00
|
|
|
else => return error.BadToken,
|
|
|
|
},
|
|
|
|
.want_map_key => switch (char) {
|
|
|
|
' ', '\t' => continue :charloop,
|
|
|
|
// forbid these characters so that flow dictionary keys cannot start
|
|
|
|
// with characters that regular dictionary keys cannot start with
|
|
|
|
// (even though they're unambiguous in this specific context).
|
|
|
|
'{', '[', '#', '>', '|', ',' => return error.BadToken,
|
|
|
|
':' => {
|
|
|
|
// we have an empty map key
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = "";
|
2023-09-17 19:47:18 -07:00
|
|
|
self.state = .want_map_value;
|
|
|
|
},
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
'}' => self.state = try self.popStack(),
|
2023-09-17 19:47:18 -07:00
|
|
|
else => {
|
|
|
|
try setStackItemStart(self.stack, idx);
|
|
|
|
self.state = .consuming_map_key;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
.consuming_map_key => switch (char) {
|
|
|
|
':' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = self.buffer[tip.item_start..idx];
|
2023-09-17 19:47:18 -07:00
|
|
|
|
|
|
|
self.state = .want_map_value;
|
|
|
|
},
|
|
|
|
else => continue :charloop,
|
|
|
|
},
|
|
|
|
.want_map_value => switch (char) {
|
|
|
|
' ', '\t' => continue :charloop,
|
|
|
|
',' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
try Parser.putMap(
|
|
|
|
&tip.value.map,
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key.?,
|
2023-09-17 19:47:18 -07:00
|
|
|
try Parser.valueFromString(self.alloc, ""),
|
|
|
|
dupe_behavior,
|
|
|
|
);
|
|
|
|
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = null;
|
2023-09-17 19:47:18 -07:00
|
|
|
self.state = .want_map_key;
|
|
|
|
},
|
|
|
|
'[' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
|
|
|
|
const new_list = try Parser.putMapGetValue(
|
|
|
|
&tip.value.map,
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key.?,
|
2023-09-17 19:47:18 -07:00
|
|
|
Value.newList(self.alloc),
|
|
|
|
dupe_behavior,
|
|
|
|
);
|
|
|
|
|
|
|
|
try self.stack.append(.{ .value = new_list, .item_start = idx + 1 });
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = null;
|
2023-09-17 19:47:18 -07:00
|
|
|
self.state = .want_list_item;
|
|
|
|
},
|
|
|
|
'{' => {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
|
|
|
|
const new_map = try Parser.putMapGetValue(
|
|
|
|
&tip.value.map,
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key.?,
|
2023-09-17 19:47:18 -07:00
|
|
|
Value.newMap(self.alloc),
|
|
|
|
dupe_behavior,
|
|
|
|
);
|
|
|
|
|
|
|
|
try self.stack.append(.{ .value = new_map });
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = null;
|
2023-09-17 19:47:18 -07:00
|
|
|
self.state = .want_map_key;
|
|
|
|
},
|
|
|
|
'}' => {
|
|
|
|
// the value is an empty string and this map is closed
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
try Parser.putMap(
|
|
|
|
&tip.value.map,
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key.?,
|
2023-09-17 19:47:18 -07:00
|
|
|
try Parser.valueFromString(self.alloc, ""),
|
|
|
|
dupe_behavior,
|
|
|
|
);
|
|
|
|
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = null;
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
self.state = try self.popStack();
|
2023-09-17 19:47:18 -07:00
|
|
|
},
|
|
|
|
else => {
|
|
|
|
try setStackItemStart(self.stack, idx);
|
|
|
|
self.state = .consuming_map_value;
|
|
|
|
},
|
|
|
|
},
|
|
|
|
.consuming_map_value => switch (char) {
|
|
|
|
',', '}' => |term| {
|
|
|
|
const tip = try getStackTip(self.stack);
|
|
|
|
try Parser.putMap(
|
|
|
|
&tip.value.map,
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key.?,
|
2023-09-17 19:47:18 -07:00
|
|
|
try Parser.valueFromString(self.alloc, self.buffer[tip.item_start..idx]),
|
|
|
|
dupe_behavior,
|
|
|
|
);
|
2023-09-17 23:09:26 -07:00
|
|
|
dangling_key = null;
|
2023-09-17 19:47:18 -07:00
|
|
|
self.state = .want_map_key;
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
if (term == '}') self.state = try self.popStack();
|
2023-09-17 19:47:18 -07:00
|
|
|
},
|
|
|
|
else => continue :charloop,
|
|
|
|
},
|
|
|
|
.want_map_separator => switch (char) {
|
|
|
|
' ', '\t' => continue :charloop,
|
|
|
|
',' => self.state = .want_map_key,
|
config: start doing some code cleanup
I was pretty sloppy with the code organization while writing out the
state machines because my focus was on thinking through the parsing
process and logic there. However, The code was not in good shape to
continue implementing code features (not document features). This is
the first of probably several commits that will work on cleaning up
some things.
Value has been promoted to the top level namespace, and Document has an
initializer function. Referencing Value.List and Value.Map are much
cleaner now. Type aliases are good.
For the flow parser, `popStack` does not have to access anything except
the current stack. This can be passed in as a parameter. This means
that `parse` is ready to be refactored to take a buffer and an
allocator.
The main next steps for code improvement are:
1. reentrant/streaming parser. I am planning to leave it as
line-buffered, though I could go further. Line-buffered has two main
benefits: the tokenizer doesn't need to be refactored significantly,
and the flow parser doesn't need to be made reentrant. I may
reevaluate this as I am implementing it, however, as those changes
may be simpler than I think.
2. Actually implement the error diagnostics info. I have some skeleton
structure in place for this, so it should just be doing the work of
getting it hooked up.
3. Parse into object. Metaprogramming, let's go. It will be interesting
to try to do this non-recursively, as well (curious to see if it
results in code bloat).
4. Object to Document. This is probably going to be annoying, since
there are a variety of edge cases that will have to be handled. And
lots of objects that cannot be represented as documents.
5. Serialize Document. One thing the parser does not preserve is
whether a Value was flow-style or not, so it will be impossible to
do round-trip formatting preservation. That's currently a non-goal,
and I haven't decided yet if flow-style output should be based on
some heuristic (number/length of values in container) or just never
emitted. Lack of round-trip preservation does make using this as a
general purpose config format a lot more dubious, so I will have to
think about this some more.
6. Document to JSON. Why not? I will hand roll this and it will suck.
And then everything will be perfect and never need to be touched again.
2023-09-18 00:01:36 -07:00
|
|
|
'}' => self.state = try self.popStack(),
|
2023-09-17 19:47:18 -07:00
|
|
|
else => return error.BadToken,
|
|
|
|
},
|
|
|
|
// the root value was closed but there are characters remaining
|
|
|
|
// in the buffer
|
|
|
|
.done => return error.BadState,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// we ran out of characters while still in the middle of an object
|
|
|
|
if (self.state != .done) return error.BadState;
|
|
|
|
|
|
|
|
return self.root;
|
|
|
|
}
|
|
|
|
};
|