all: do some restructuring
I don't like big monolithic source files, so let's restructure a bit. parser.zig is still bigger than I would like it to be, but there isn't a good way to break up the two state machine parsers, which take up most of the space. This is the last junk commit before I am seriously going to implement the "streaming" parser. Which is the last change before implementing deserialization to object. I am definitely not just spinning my wheels here.
This commit is contained in:
parent
8684fab23c
commit
38e47b39dc
@ -4,7 +4,7 @@ pub fn build(b: *std.Build) void {
|
|||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
|
|
||||||
const nice = b.addModule("nice", .{
|
const nice = b.addModule("nice", .{
|
||||||
.source_file = .{ .path = "src/config.zig" },
|
.source_file = .{ .path = "src/nice.zig" },
|
||||||
});
|
});
|
||||||
|
|
||||||
add_examples(b, .{
|
add_examples(b, .{
|
||||||
|
105
src/linebuffer.zig
Normal file
105
src/linebuffer.zig
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub const IndexSlice = struct { start: usize, len: usize };
|
||||||
|
|
||||||
|
pub const LineBuffer = struct {
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
internal: FixedLineBuffer,
|
||||||
|
used: usize,
|
||||||
|
|
||||||
|
pub const default_capacity: usize = 4096;
|
||||||
|
pub const Error = std.mem.Allocator.Error;
|
||||||
|
|
||||||
|
pub fn init(allocator: std.mem.Allocator) Error!LineBuffer {
|
||||||
|
return initCapacity(allocator, default_capacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) Error!LineBuffer {
|
||||||
|
return .{
|
||||||
|
.allocator = allocator,
|
||||||
|
.internal = .{
|
||||||
|
.buffer = try allocator.alloc(u8, capacity),
|
||||||
|
.window = .{ .start = 0, .len = 0 },
|
||||||
|
},
|
||||||
|
.used = 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn feed(self: *LineBuffer, data: []const u8) Error!void {
|
||||||
|
if (data.len == 0) return;
|
||||||
|
// TODO: check for usize overflow here if we want Maximum Robustness
|
||||||
|
const new_window_len = self.internal.window.len + data.len;
|
||||||
|
|
||||||
|
// data cannot fit in the buffer with our scan window, so we have to realloc
|
||||||
|
if (new_window_len > self.internal.buffer.len) {
|
||||||
|
// TODO: adopt an overallocation strategy? Will potentially avoid allocating
|
||||||
|
// on every invocation but will cause the buffer to oversize
|
||||||
|
try self.allocator.realloc(self.internal.buffer, new_window_len);
|
||||||
|
self.rehome();
|
||||||
|
@memcpy(self.internal.buffer[self.used..].ptr, data);
|
||||||
|
self.used = new_window_len;
|
||||||
|
self.internal.window.len = new_window_len;
|
||||||
|
}
|
||||||
|
// data will fit, but needs to be moved in the buffer
|
||||||
|
else if (self.internal.window.start + new_window_len > self.internal.buffer.len) {
|
||||||
|
self.rehome();
|
||||||
|
@memcpy(self.internal.buffer[self.used..].ptr, data);
|
||||||
|
self.used = new_window_len;
|
||||||
|
self.internal.window.len = new_window_len;
|
||||||
|
}
|
||||||
|
// data can simply be appended
|
||||||
|
else {
|
||||||
|
@memcpy(self.internal.buffer[self.used..].ptr, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The memory returned by this function is valid until the next call to `feed`.
|
||||||
|
/// The resulting slice does not include the newline character.
|
||||||
|
pub fn nextLine(self: *LineBuffer) ?[]const u8 {
|
||||||
|
return self.internal.nextLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rehome(self: *LineBuffer) void {
|
||||||
|
self.internal.rehome();
|
||||||
|
self.used = self.internal.window.len;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const FixedLineBuffer = struct {
|
||||||
|
buffer: []const u8,
|
||||||
|
window: IndexSlice,
|
||||||
|
|
||||||
|
pub fn init(data: []const u8) FixedLineBuffer {
|
||||||
|
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn nextLine(self: *FixedLineBuffer) ?[]const u8 {
|
||||||
|
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
const window = self.buffer[self.window.start..][0..self.window.len];
|
||||||
|
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
||||||
|
|
||||||
|
self.window.start += split + 1;
|
||||||
|
self.window.len -= split + 1;
|
||||||
|
|
||||||
|
return window[0..split];
|
||||||
|
}
|
||||||
|
|
||||||
|
// move the current scan window to the beginning of the buffer. This internal
|
||||||
|
// method is used by LineBuffer.
|
||||||
|
fn rehome(self: *LineBuffer) usize {
|
||||||
|
if (self.window.start == 0) return;
|
||||||
|
|
||||||
|
const window = self.buffer[self.window.start..][0..self.window.len];
|
||||||
|
|
||||||
|
// if the window is longer than its starting index, the memory move will be
|
||||||
|
// overlapping, so we can't use memcpy
|
||||||
|
if (self.window.len > self.window.start)
|
||||||
|
std.mem.copyForwards(u8, self.buffer, window)
|
||||||
|
else
|
||||||
|
@memcpy(self.buffer.ptr, window);
|
||||||
|
|
||||||
|
self.window.start = 0;
|
||||||
|
}
|
||||||
|
};
|
67
src/nice.zig
Normal file
67
src/nice.zig
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
|
||||||
|
//
|
||||||
|
// - Doesn't support multiline keys (this means map keys cannot start with
|
||||||
|
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
|
||||||
|
// - Allows using tabs for indentation (but not mixed tabs/spaces)
|
||||||
|
// - Indentation must be quantized consistently throughout the document. e.g.
|
||||||
|
// every nested layer being exactly 2 spaces past its parent. Tabs may
|
||||||
|
// only use one tab per indentation level.
|
||||||
|
// - Allows flow-style lists, maps, and strings on the same line as map keys or
|
||||||
|
// list items (i.e. the following are legal):
|
||||||
|
//
|
||||||
|
// key: {inline: map}
|
||||||
|
// key: [inline, list]
|
||||||
|
// key: > inline string
|
||||||
|
// - {map: item}
|
||||||
|
// - [list, item]
|
||||||
|
// - > inline string
|
||||||
|
//
|
||||||
|
// The string case retains the possibility of having an inline map value starting
|
||||||
|
// with {, [, or >
|
||||||
|
// - a map keys and list item dashes must be followed by a value or an indented
|
||||||
|
// section to reduce parser quantum state. This means that
|
||||||
|
//
|
||||||
|
// foo:
|
||||||
|
// bar: baz
|
||||||
|
//
|
||||||
|
// or
|
||||||
|
//
|
||||||
|
// -
|
||||||
|
// - qux
|
||||||
|
//
|
||||||
|
// are not valid. This can be represented with an inline empty string after foo:
|
||||||
|
//
|
||||||
|
// foo: >
|
||||||
|
// bar: baz
|
||||||
|
//
|
||||||
|
// or
|
||||||
|
//
|
||||||
|
// - >
|
||||||
|
// - qux
|
||||||
|
//
|
||||||
|
// - newlines are strictly LF, if the parser finds CR, it is an error
|
||||||
|
// - blank lines may not contain any whitespace characters except the single LF
|
||||||
|
// - Additional string indicator `|` for soft-wrapped strings, i.e.
|
||||||
|
//
|
||||||
|
// key: | this is not special
|
||||||
|
// key:
|
||||||
|
// | these lines are
|
||||||
|
// | soft-wrapped
|
||||||
|
//
|
||||||
|
// soft-wrapped lines are joined with a ' ' instead of a newline character.
|
||||||
|
// Like multiline strings, the final space is stripped (I guess this is a very
|
||||||
|
// janky way to add trailing whitespace to a string).
|
||||||
|
//
|
||||||
|
// - terminated strings to allow trailing whitespace:
|
||||||
|
// | this string has trailing whitespace |
|
||||||
|
// > and so does this one |
|
||||||
|
// - The parser is both strict and probably sloppy and may have weird edge
|
||||||
|
// cases since I'm slinging code, not writing a spec. For example, tabs are
|
||||||
|
// not trimmed from the values of inline lists/maps
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub const buffers = @import("./linebuffer.zig");
|
||||||
|
pub const tokenizer = @import("./tokenizer.zig");
|
||||||
|
pub const parser = @import("./parser.zig");
|
||||||
|
pub const Parser = parser.Parser;
|
@ -1,69 +1,8 @@
|
|||||||
// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
|
|
||||||
//
|
|
||||||
// - Doesn't support multiline keys (this means map keys cannot start with
|
|
||||||
// ' ', \t, #, {, [, |, or >, and they cannot contain :)
|
|
||||||
// - Allows using tabs for indentation (but not mixed tabs/spaces)
|
|
||||||
// - Indentation must be quantized consistently throughout the document. e.g.
|
|
||||||
// every nested layer being exactly 2 spaces past its parent. Tabs may
|
|
||||||
// only use one tab per indentation level.
|
|
||||||
// - Allows flow-style lists, maps, and strings on the same line as map keys or
|
|
||||||
// list items (i.e. the following are legal):
|
|
||||||
//
|
|
||||||
// key: {inline: map}
|
|
||||||
// key: [inline, list]
|
|
||||||
// key: > inline string
|
|
||||||
// - {map: item}
|
|
||||||
// - [list, item]
|
|
||||||
// - > inline string
|
|
||||||
//
|
|
||||||
// The string case retains the possibility of having an inline map value starting
|
|
||||||
// with {, [, or >
|
|
||||||
// - inline lists and maps cannot contain other inline structures. This may
|
|
||||||
// change, as writing {:[{:[{:[{:[{:[{:[]}]}]}]}]}]} seems tremendously useful
|
|
||||||
// - a map keys and list item dashes must be followed by a value or an indented
|
|
||||||
// section to reduce parser quantum state. This means that
|
|
||||||
//
|
|
||||||
// foo:
|
|
||||||
// bar: baz
|
|
||||||
//
|
|
||||||
// or
|
|
||||||
//
|
|
||||||
// -
|
|
||||||
// - qux
|
|
||||||
//
|
|
||||||
// are not valid. This can be represented with an inline empty string after foo:
|
|
||||||
//
|
|
||||||
// foo: >
|
|
||||||
// bar: baz
|
|
||||||
//
|
|
||||||
// or
|
|
||||||
//
|
|
||||||
// - >
|
|
||||||
// - qux
|
|
||||||
//
|
|
||||||
// - newlines are strictly LF, if the parser finds CR, it is an error
|
|
||||||
// - blank lines may not contain any whitespace characters except the single LF
|
|
||||||
// - Additional string indicator `|` for soft-wrapped strings, i.e.
|
|
||||||
//
|
|
||||||
// key: | this is not special
|
|
||||||
// key:
|
|
||||||
// | these lines are
|
|
||||||
// | soft-wrapped
|
|
||||||
//
|
|
||||||
// soft-wrapped lines are joined with a ' ' instead of a newline character.
|
|
||||||
// Like multiline strings, the final space is stripped (I guess this is a very
|
|
||||||
// janky way to add trailing whitespace to a string).
|
|
||||||
//
|
|
||||||
// - terminated strings to allow trailing whitespace:
|
|
||||||
// | this string has trailing whitespace |
|
|
||||||
// > and so does this one |
|
|
||||||
// - The parser is both strict and probably sloppy and may have weird edge
|
|
||||||
// cases since I'm slinging code, not writing a spec. For example, tabs are
|
|
||||||
// not trimmed from the values of inline lists/maps
|
|
||||||
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
pub const IndexSlice = struct { start: usize, len: usize };
|
const buffers = @import("./linebuffer.zig");
|
||||||
|
const tokenizer = @import("./tokenizer.zig");
|
||||||
|
const Value = @import("./parser/value.zig").Value;
|
||||||
|
|
||||||
pub const Diagnostics = struct {
|
pub const Diagnostics = struct {
|
||||||
row: usize,
|
row: usize,
|
||||||
@ -71,495 +10,7 @@ pub const Diagnostics = struct {
|
|||||||
message: []const u8,
|
message: []const u8,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const LineBuffer = struct {
|
pub const Error = error{
|
||||||
allocator: std.mem.Allocator,
|
|
||||||
buffer: []u8,
|
|
||||||
used: usize,
|
|
||||||
window: IndexSlice,
|
|
||||||
|
|
||||||
pub const default_capacity: usize = 4096;
|
|
||||||
pub const Error = std.mem.Allocator.Error;
|
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) Error!LineBuffer {
|
|
||||||
return initCapacity(allocator, default_capacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) Error!LineBuffer {
|
|
||||||
return .{
|
|
||||||
.allocator = allocator,
|
|
||||||
.buffer = try allocator.alloc(u8, capacity),
|
|
||||||
.used = 0,
|
|
||||||
.window = .{ .start = 0, .len = 0 },
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn feed(self: *LineBuffer, data: []const u8) Error!void {
|
|
||||||
if (data.len == 0) return;
|
|
||||||
// TODO: check for usize overflow here if we want Maximum Robustness
|
|
||||||
const new_window_len = self.window.len + data.len;
|
|
||||||
|
|
||||||
// data cannot fit in the buffer with our scan window, so we have to realloc
|
|
||||||
if (new_window_len > self.buffer.len) {
|
|
||||||
// TODO: adopt an overallocation strategy? Will potentially avoid allocating
|
|
||||||
// on every invocation but will cause the buffer to oversize
|
|
||||||
try self.allocator.realloc(self.buffer, new_window_len);
|
|
||||||
self.rehome();
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
self.used = new_window_len;
|
|
||||||
self.window.len = new_window_len;
|
|
||||||
}
|
|
||||||
// data will fit, but needs to be moved in the buffer
|
|
||||||
else if (self.window.start + new_window_len > self.buffer.len) {
|
|
||||||
self.rehome();
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
self.used = new_window_len;
|
|
||||||
self.window.len = new_window_len;
|
|
||||||
}
|
|
||||||
// data can simply be appended
|
|
||||||
else {
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The memory returned by this function is valid until the next call to `feed`.
|
|
||||||
/// The resulting slice does not include the newline character.
|
|
||||||
pub fn nextLine(self: *LineBuffer) ?[]const u8 {
|
|
||||||
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
||||||
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
||||||
|
|
||||||
self.window.start += split + 1;
|
|
||||||
self.window.len -= split + 1;
|
|
||||||
|
|
||||||
return window[0..split];
|
|
||||||
}
|
|
||||||
|
|
||||||
fn rehome(self: *LineBuffer) void {
|
|
||||||
if (self.window.start == 0) return;
|
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
||||||
|
|
||||||
if (self.window.len > self.window.start)
|
|
||||||
std.mem.copyForwards(u8, self.buffer, window)
|
|
||||||
else
|
|
||||||
@memcpy(self.buffer.ptr, window);
|
|
||||||
|
|
||||||
self.window.start = 0;
|
|
||||||
self.used = window.len;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const FixedLineBuffer = struct {
|
|
||||||
buffer: []const u8,
|
|
||||||
window: IndexSlice,
|
|
||||||
|
|
||||||
pub fn init(data: []const u8) FixedLineBuffer {
|
|
||||||
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nextLine(self: *FixedLineBuffer) ?[]const u8 {
|
|
||||||
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
||||||
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
||||||
|
|
||||||
self.window.start += split + 1;
|
|
||||||
self.window.len -= split + 1;
|
|
||||||
|
|
||||||
return window[0..split];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const IndentationType = union(enum) {
|
|
||||||
immaterial: void,
|
|
||||||
spaces: usize,
|
|
||||||
tabs: void,
|
|
||||||
};
|
|
||||||
|
|
||||||
const InlineItem = union(enum) {
|
|
||||||
empty: void,
|
|
||||||
scalar: []const u8,
|
|
||||||
line_string: []const u8,
|
|
||||||
space_string: []const u8,
|
|
||||||
|
|
||||||
flow_list: []const u8,
|
|
||||||
flow_map: []const u8,
|
|
||||||
|
|
||||||
fn lineEnding(self: InlineItem) u8 {
|
|
||||||
return switch (self) {
|
|
||||||
.line_string => '\n',
|
|
||||||
.space_string => ' ',
|
|
||||||
else => unreachable,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const LineContents = union(enum) {
|
|
||||||
comment: []const u8,
|
|
||||||
|
|
||||||
in_line: InlineItem,
|
|
||||||
list_item: InlineItem,
|
|
||||||
map_item: struct { key: []const u8, val: InlineItem },
|
|
||||||
};
|
|
||||||
|
|
||||||
// we can dedent multiple levels at once. Example:
|
|
||||||
//
|
|
||||||
// foo:
|
|
||||||
// bar:
|
|
||||||
// > a
|
|
||||||
// > string
|
|
||||||
// baz: [qux]
|
|
||||||
//
|
|
||||||
// capturing this is conceptually simple, but implementing it without complex
|
|
||||||
// indentation tracking requires quantizing the indentation. This means our
|
|
||||||
// IndentationType will also need to track the number of spaces used for
|
|
||||||
// indentation, as detected. Then every line we have to check indent rem the
|
|
||||||
// quantization level == 0 (otherwise we broke quantization) and compute indent
|
|
||||||
// div the quantization level to give us our effective indentation level.
|
|
||||||
|
|
||||||
const ShiftDirection = enum { indent, dedent, none };
|
|
||||||
const RelativeIndent = union(ShiftDirection) {
|
|
||||||
indent: void,
|
|
||||||
dedent: usize,
|
|
||||||
none: void,
|
|
||||||
};
|
|
||||||
|
|
||||||
const Line = struct {
|
|
||||||
indent: RelativeIndent,
|
|
||||||
contents: LineContents,
|
|
||||||
raw: []const u8,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn LineTokenizer(comptime Buffer: type) type {
|
|
||||||
return struct {
|
|
||||||
buffer: Buffer,
|
|
||||||
index: usize = 0,
|
|
||||||
indentation: IndentationType = .immaterial,
|
|
||||||
last_indent: usize = 0,
|
|
||||||
diagnostics: *Diagnostics,
|
|
||||||
row: usize = 0,
|
|
||||||
|
|
||||||
const Error = error{
|
|
||||||
BadToken,
|
|
||||||
MixedIndentation,
|
|
||||||
UnquantizedIndentation,
|
|
||||||
TooMuchIndentation,
|
|
||||||
MissingNewline,
|
|
||||||
TrailingWhitespace,
|
|
||||||
Impossible,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn next(self: *@This()) Error!?Line {
|
|
||||||
lineloop: while (self.buffer.nextLine()) |raw_line| {
|
|
||||||
var indent: usize = 0;
|
|
||||||
for (raw_line, 0..) |char, idx| {
|
|
||||||
switch (char) {
|
|
||||||
' ' => {
|
|
||||||
switch (self.indentation) {
|
|
||||||
// There's a weird coupling here because we can't set this until
|
|
||||||
// all spaces have been consumed. I also thought about ignoring
|
|
||||||
// spaces on comment lines since those don't affect the
|
|
||||||
// relative indent/dedent, but then we would allow comments
|
|
||||||
// to ignore our indent quantum, which I dislike due to it making
|
|
||||||
// ugly documents.
|
|
||||||
.immaterial => self.indentation = .{ .spaces = 0 },
|
|
||||||
.spaces => {},
|
|
||||||
.tabs => return error.MixedIndentation,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'\t' => {
|
|
||||||
switch (self.indentation) {
|
|
||||||
.immaterial => self.indentation = .tabs,
|
|
||||||
.spaces => return error.MixedIndentation,
|
|
||||||
.tabs => {},
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'\r' => {
|
|
||||||
return error.BadToken;
|
|
||||||
},
|
|
||||||
else => {
|
|
||||||
indent = idx;
|
|
||||||
break;
|
|
||||||
},
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (raw_line.len > 0) return error.TrailingWhitespace;
|
|
||||||
continue :lineloop;
|
|
||||||
}
|
|
||||||
|
|
||||||
var quantized: usize = if (self.indentation == .spaces) quant: {
|
|
||||||
if (self.indentation.spaces == 0) {
|
|
||||||
self.indentation.spaces = indent;
|
|
||||||
}
|
|
||||||
if (@rem(indent, self.indentation.spaces) != 0)
|
|
||||||
return error.UnquantizedIndentation;
|
|
||||||
|
|
||||||
break :quant @divExact(indent, self.indentation.spaces);
|
|
||||||
} else indent;
|
|
||||||
|
|
||||||
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
|
|
||||||
if ((quantized - self.last_indent) > 1)
|
|
||||||
return error.TooMuchIndentation;
|
|
||||||
break :rel .indent;
|
|
||||||
} else if (quantized < self.last_indent)
|
|
||||||
.{ .dedent = self.last_indent - quantized }
|
|
||||||
else
|
|
||||||
.none;
|
|
||||||
|
|
||||||
defer {
|
|
||||||
self.row += 1;
|
|
||||||
self.last_indent = quantized;
|
|
||||||
}
|
|
||||||
|
|
||||||
const line = raw_line[indent..];
|
|
||||||
|
|
||||||
// this should not be possible, as empty lines are caught earlier.
|
|
||||||
if (line.len == 0) return error.Impossible;
|
|
||||||
|
|
||||||
switch (line[0]) {
|
|
||||||
'#' => {
|
|
||||||
// simply lie about indentation when the line is a comment.
|
|
||||||
quantized = self.last_indent;
|
|
||||||
return .{
|
|
||||||
.indent = .none,
|
|
||||||
.contents = .{ .comment = line[1..] },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
'|', '>', '[', '{' => {
|
|
||||||
return .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .in_line = try detectInlineItem(line) },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
'-' => {
|
|
||||||
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
|
||||||
|
|
||||||
return if (line.len == 1) .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .list_item = .empty },
|
|
||||||
.raw = line,
|
|
||||||
} else .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
else => {
|
|
||||||
for (line, 0..) |char, idx| {
|
|
||||||
if (char == ':') {
|
|
||||||
if (idx + 1 == line.len) return .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (line[idx + 1] != ' ') return error.BadToken;
|
|
||||||
|
|
||||||
return .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .map_item = .{
|
|
||||||
.key = line[0..idx],
|
|
||||||
.val = try detectInlineItem(line[idx + 2 ..]),
|
|
||||||
} },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return .{
|
|
||||||
.indent = relative,
|
|
||||||
.contents = .{ .in_line = .{ .scalar = line } },
|
|
||||||
.raw = line,
|
|
||||||
};
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// somehow everything else has failed
|
|
||||||
return error.Impossible;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
|
||||||
if (buf.len == 0) return .empty;
|
|
||||||
|
|
||||||
switch (buf[0]) {
|
|
||||||
'>', '|' => |char| {
|
|
||||||
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
|
||||||
|
|
||||||
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
|
||||||
' ', '\t' => return error.TrailingWhitespace,
|
|
||||||
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
|
||||||
else => buf[@min(2, buf.len)..buf.len],
|
|
||||||
};
|
|
||||||
|
|
||||||
return if (char == '>')
|
|
||||||
.{ .line_string = slice }
|
|
||||||
else
|
|
||||||
.{ .space_string = slice };
|
|
||||||
},
|
|
||||||
'[' => {
|
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
|
||||||
return error.BadToken;
|
|
||||||
|
|
||||||
// keep the closing ] for the flow parser
|
|
||||||
return .{ .flow_list = buf[1..] };
|
|
||||||
},
|
|
||||||
'{' => {
|
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
|
||||||
return error.BadToken;
|
|
||||||
|
|
||||||
// keep the closing } fpr the flow parser
|
|
||||||
return .{ .flow_map = buf[1..] };
|
|
||||||
},
|
|
||||||
else => {
|
|
||||||
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
|
||||||
return error.TrailingWhitespace;
|
|
||||||
|
|
||||||
return .{ .scalar = buf };
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const Value = union(enum) {
|
|
||||||
pub const String = std.ArrayList(u8);
|
|
||||||
pub const Map = std.StringArrayHashMap(Value);
|
|
||||||
pub const List = std.ArrayList(Value);
|
|
||||||
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
|
||||||
|
|
||||||
scalar: String,
|
|
||||||
string: String,
|
|
||||||
list: List,
|
|
||||||
flow_list: List,
|
|
||||||
map: Map,
|
|
||||||
flow_map: Map,
|
|
||||||
|
|
||||||
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
||||||
return try _fromScalarOrString(alloc, .scalar, input);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
|
||||||
return try _fromScalarOrString(alloc, .string, input);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
|
||||||
var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
|
|
||||||
@field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newScalar(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .scalar = String.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .string = String.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .list = List.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .flow_list = List.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .map = Map.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
|
|
||||||
return .{ .flow_map = Map.init(alloc) };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn printDebug(self: Value) void {
|
|
||||||
self.printRecursive(0);
|
|
||||||
std.debug.print("\n", .{});
|
|
||||||
}
|
|
||||||
|
|
||||||
fn printRecursive(self: Value, indent: usize) void {
|
|
||||||
switch (self) {
|
|
||||||
.scalar, .string => |str| {
|
|
||||||
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
|
||||||
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
|
||||||
std.debug.print("\n", .{});
|
|
||||||
while (lines.next()) |line| {
|
|
||||||
std.debug.print(
|
|
||||||
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
|
||||||
.{
|
|
||||||
.empty = "",
|
|
||||||
.indent = indent,
|
|
||||||
.line = line,
|
|
||||||
.nl = if (lines.peek() == null) "" else "\n",
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
std.debug.print("{s}", .{str.items});
|
|
||||||
}
|
|
||||||
},
|
|
||||||
.list, .flow_list => |list| {
|
|
||||||
if (list.items.len == 0) {
|
|
||||||
std.debug.print("[]", .{});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std.debug.print("[\n", .{});
|
|
||||||
for (list.items, 0..) |value, idx| {
|
|
||||||
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
|
||||||
value.printRecursive(indent + 2);
|
|
||||||
std.debug.print(",\n", .{});
|
|
||||||
}
|
|
||||||
std.debug.print(
|
|
||||||
"{[empty]s: >[indent]}]",
|
|
||||||
.{ .empty = "", .indent = indent },
|
|
||||||
);
|
|
||||||
},
|
|
||||||
.map, .flow_map => |map| {
|
|
||||||
if (map.count() == 0) {
|
|
||||||
std.debug.print("{{}}", .{});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std.debug.print("{{\n", .{});
|
|
||||||
|
|
||||||
var iter = map.iterator();
|
|
||||||
|
|
||||||
while (iter.next()) |entry| {
|
|
||||||
std.debug.print(
|
|
||||||
"{[empty]s: >[indent]}{[key]s}: ",
|
|
||||||
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
|
||||||
);
|
|
||||||
entry.value_ptr.printRecursive(indent + 4);
|
|
||||||
std.debug.print(",\n", .{});
|
|
||||||
}
|
|
||||||
std.debug.print(
|
|
||||||
"{[empty]s: >[indent]}}}",
|
|
||||||
.{ .empty = "", .indent = indent },
|
|
||||||
);
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const Parser = struct {
|
|
||||||
allocator: std.mem.Allocator,
|
|
||||||
dupe_behavior: DuplicateKeyBehavior = .fail,
|
|
||||||
default_object: DefaultObject = .fail,
|
|
||||||
diagnostics: Diagnostics = .{
|
|
||||||
.row = 0,
|
|
||||||
.span = .{ .absolute = 0, .line_offset = 0, .length = 0 },
|
|
||||||
.message = "all is well",
|
|
||||||
},
|
|
||||||
|
|
||||||
pub const Error = error{
|
|
||||||
UnexpectedIndent,
|
UnexpectedIndent,
|
||||||
UnexpectedValue,
|
UnexpectedValue,
|
||||||
ExtraContent,
|
ExtraContent,
|
||||||
@ -569,28 +20,25 @@ pub const Parser = struct {
|
|||||||
BadState,
|
BadState,
|
||||||
BadToken,
|
BadToken,
|
||||||
Fail,
|
Fail,
|
||||||
} || LineTokenizer(FixedLineBuffer).Error || std.mem.Allocator.Error;
|
} || tokenizer.Error || std.mem.Allocator.Error;
|
||||||
|
|
||||||
pub const DuplicateKeyBehavior = enum {
|
pub const DuplicateKeyBehavior = enum {
|
||||||
use_first,
|
use_first,
|
||||||
use_last,
|
use_last,
|
||||||
fail,
|
fail,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const DefaultObject = enum {
|
pub const DefaultObject = enum {
|
||||||
|
scalar,
|
||||||
string,
|
string,
|
||||||
list,
|
list,
|
||||||
map,
|
map,
|
||||||
fail,
|
fail,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const ParseState = enum {
|
const ParseState = enum { initial, value, done };
|
||||||
initial,
|
|
||||||
value,
|
|
||||||
done,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const Document = struct {
|
pub const Document = struct {
|
||||||
arena: std.heap.ArenaAllocator,
|
arena: std.heap.ArenaAllocator,
|
||||||
root: Value,
|
root: Value,
|
||||||
|
|
||||||
@ -608,15 +56,25 @@ pub const Parser = struct {
|
|||||||
pub fn deinit(self: Document) void {
|
pub fn deinit(self: Document) void {
|
||||||
self.arena.deinit();
|
self.arena.deinit();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const Parser = struct {
|
||||||
|
allocator: std.mem.Allocator,
|
||||||
|
dupe_behavior: DuplicateKeyBehavior = .fail,
|
||||||
|
default_object: DefaultObject = .fail,
|
||||||
|
diagnostics: Diagnostics = .{
|
||||||
|
.row = 0,
|
||||||
|
.span = .{ .absolute = 0, .line_offset = 0, .length = 0 },
|
||||||
|
.message = "all is well",
|
||||||
|
},
|
||||||
|
|
||||||
pub const State = struct {
|
pub const State = struct {
|
||||||
pub const Stack = std.ArrayList(*Value);
|
pub const Stack = std.ArrayList(*Value);
|
||||||
|
|
||||||
document: Document,
|
document: Document,
|
||||||
value_stack: Stack,
|
value_stack: Stack,
|
||||||
state: ParseState = .initial,
|
state: enum { initial, value, done } = .initial,
|
||||||
expect_shift: ShiftDirection = .none,
|
expect_shift: tokenizer.ShiftDirection = .none,
|
||||||
dangling_key: ?[]const u8 = null,
|
dangling_key: ?[]const u8 = null,
|
||||||
|
|
||||||
pub fn init(alloc: std.mem.Allocator) State {
|
pub fn init(alloc: std.mem.Allocator) State {
|
||||||
@ -637,13 +95,13 @@ pub const Parser = struct {
|
|||||||
const arena_alloc = document.arena.allocator();
|
const arena_alloc = document.arena.allocator();
|
||||||
|
|
||||||
var state: ParseState = .initial;
|
var state: ParseState = .initial;
|
||||||
var expect_shift: ShiftDirection = .none;
|
var expect_shift: tokenizer.ShiftDirection = .none;
|
||||||
var dangling_key: ?[]const u8 = null;
|
var dangling_key: ?[]const u8 = null;
|
||||||
var stack = std.ArrayList(*Value).init(arena_alloc);
|
var stack = std.ArrayList(*Value).init(arena_alloc);
|
||||||
defer stack.deinit();
|
defer stack.deinit();
|
||||||
|
|
||||||
var tok: LineTokenizer(FixedLineBuffer) = .{
|
var tok: tokenizer.LineTokenizer(buffers.FixedLineBuffer) = .{
|
||||||
.buffer = FixedLineBuffer.init(buffer),
|
.buffer = buffers.FixedLineBuffer.init(buffer),
|
||||||
.diagnostics = &self.diagnostics,
|
.diagnostics = &self.diagnostics,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -656,7 +114,7 @@ pub const Parser = struct {
|
|||||||
flipflop: while (flip) : (flop = true) {
|
flipflop: while (flip) : (flop = true) {
|
||||||
switch (state) {
|
switch (state) {
|
||||||
.initial => {
|
.initial => {
|
||||||
if (line.indent == .indent) return error.UnexpectedIndent;
|
if (line.shift == .indent) return error.UnexpectedIndent;
|
||||||
|
|
||||||
switch (line.contents) {
|
switch (line.contents) {
|
||||||
// we filter out comments above
|
// we filter out comments above
|
||||||
@ -737,14 +195,14 @@ pub const Parser = struct {
|
|||||||
// switch is embedded.
|
// switch is embedded.
|
||||||
.scalar, .flow_list, .flow_map => unreachable,
|
.scalar, .flow_list, .flow_map => unreachable,
|
||||||
.string => |*string| {
|
.string => |*string| {
|
||||||
if (line.indent == .indent)
|
if (line.shift == .indent)
|
||||||
return error.UnexpectedIndent;
|
return error.UnexpectedIndent;
|
||||||
|
|
||||||
if (!flop and line.indent == .dedent) {
|
if (!flop and line.shift == .dedent) {
|
||||||
// kick off the last trailing space or newline
|
// kick off the last trailing space or newline
|
||||||
_ = string.pop();
|
_ = string.pop();
|
||||||
|
|
||||||
var dedent_depth = line.indent.dedent;
|
var dedent_depth = line.shift.dedent;
|
||||||
while (dedent_depth > 0) : (dedent_depth -= 1)
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
||||||
_ = stack.pop();
|
_ = stack.pop();
|
||||||
|
|
||||||
@ -772,7 +230,7 @@ pub const Parser = struct {
|
|||||||
//
|
//
|
||||||
// the first line here creates the expect_shift, but the second line
|
// the first line here creates the expect_shift, but the second line
|
||||||
// is a valid continuation of the list despite not being indented
|
// is a valid continuation of the list despite not being indented
|
||||||
if (!flop and (expect_shift == .indent and line.indent != .indent))
|
if (!flop and (expect_shift == .indent and line.shift != .indent))
|
||||||
try list.append(Value.newScalar(arena_alloc));
|
try list.append(Value.newScalar(arena_alloc));
|
||||||
|
|
||||||
// Consider:
|
// Consider:
|
||||||
@ -782,11 +240,11 @@ pub const Parser = struct {
|
|||||||
// - inline scalar
|
// - inline scalar
|
||||||
//
|
//
|
||||||
// the own-line scalar will not push the stack but the next list item will be a dedent
|
// the own-line scalar will not push the stack but the next list item will be a dedent
|
||||||
if (!flop and line.indent == .dedent) {
|
if (!flop and line.shift == .dedent) {
|
||||||
// if line.indent.dedent is 1 and we're expecting it, the stack will not be popped,
|
// if line.shift.dedent is 1 and we're expecting it, the stack will not be popped,
|
||||||
// but we will continue loop flipflop. However, flop will be set to false on the next
|
// but we will continue loop flipflop. However, flop will be set to false on the next
|
||||||
// trip, so this if prong will not be run again.
|
// trip, so this if prong will not be run again.
|
||||||
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
var dedent_depth = line.shift.dedent - @intFromBool(expect_shift == .dedent);
|
||||||
|
|
||||||
while (dedent_depth > 0) : (dedent_depth -= 1)
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
||||||
_ = stack.pop();
|
_ = stack.pop();
|
||||||
@ -799,7 +257,7 @@ pub const Parser = struct {
|
|||||||
.in_line => |in_line| {
|
.in_line => |in_line| {
|
||||||
// assert that this line has been indented. this is required for an inline value when
|
// assert that this line has been indented. this is required for an inline value when
|
||||||
// the stack is in list mode.
|
// the stack is in list mode.
|
||||||
if (expect_shift != .indent or line.indent != .indent)
|
if (expect_shift != .indent or line.shift != .indent)
|
||||||
return error.UnexpectedValue;
|
return error.UnexpectedValue;
|
||||||
|
|
||||||
expect_shift = .dedent;
|
expect_shift = .dedent;
|
||||||
@ -819,7 +277,7 @@ pub const Parser = struct {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
.list_item => |value| {
|
.list_item => |value| {
|
||||||
if (flop or (line.indent == .none or line.indent == .dedent)) {
|
if (flop or (line.shift == .none or line.shift == .dedent)) {
|
||||||
expect_shift = .none;
|
expect_shift = .none;
|
||||||
switch (value) {
|
switch (value) {
|
||||||
.empty => expect_shift = .indent,
|
.empty => expect_shift = .indent,
|
||||||
@ -828,7 +286,7 @@ pub const Parser = struct {
|
|||||||
.flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
.flow_list => |str| try list.append(try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior)),
|
||||||
.flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
.flow_map => |str| try list.append(try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior)),
|
||||||
}
|
}
|
||||||
} else if (line.indent == .indent) {
|
} else if (line.shift == .indent) {
|
||||||
if (expect_shift != .indent) return error.UnexpectedIndent;
|
if (expect_shift != .indent) return error.UnexpectedIndent;
|
||||||
|
|
||||||
const new_list = try appendListGetValue(list, Value.newList(arena_alloc));
|
const new_list = try appendListGetValue(list, Value.newList(arena_alloc));
|
||||||
@ -847,7 +305,7 @@ pub const Parser = struct {
|
|||||||
//
|
//
|
||||||
// dedenting back to the list stack level requires list_item
|
// dedenting back to the list stack level requires list_item
|
||||||
|
|
||||||
if (line.indent != .indent)
|
if (line.shift != .indent)
|
||||||
return error.UnexpectedValue;
|
return error.UnexpectedValue;
|
||||||
|
|
||||||
const new_map = try appendListGetValue(list, Value.newMap(arena_alloc));
|
const new_map = try appendListGetValue(list, Value.newMap(arena_alloc));
|
||||||
@ -865,7 +323,7 @@ pub const Parser = struct {
|
|||||||
//
|
//
|
||||||
// the first line here creates the expect_shift, but the second line
|
// the first line here creates the expect_shift, but the second line
|
||||||
// is a valid continuation of the map despite not being indented
|
// is a valid continuation of the map despite not being indented
|
||||||
if (!flop and (expect_shift == .indent and line.indent != .indent)) {
|
if (!flop and (expect_shift == .indent and line.shift != .indent)) {
|
||||||
try putMap(
|
try putMap(
|
||||||
map,
|
map,
|
||||||
dangling_key orelse return error.Fail,
|
dangling_key orelse return error.Fail,
|
||||||
@ -875,8 +333,8 @@ pub const Parser = struct {
|
|||||||
dangling_key = null;
|
dangling_key = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!flop and line.indent == .dedent) {
|
if (!flop and line.shift == .dedent) {
|
||||||
var dedent_depth = line.indent.dedent - @intFromBool(expect_shift == .dedent);
|
var dedent_depth = line.shift.dedent - @intFromBool(expect_shift == .dedent);
|
||||||
|
|
||||||
while (dedent_depth > 0) : (dedent_depth -= 1)
|
while (dedent_depth > 0) : (dedent_depth -= 1)
|
||||||
_ = stack.pop();
|
_ = stack.pop();
|
||||||
@ -889,7 +347,7 @@ pub const Parser = struct {
|
|||||||
.in_line => |in_line| {
|
.in_line => |in_line| {
|
||||||
// assert that this line has been indented. this is required for an inline value when
|
// assert that this line has been indented. this is required for an inline value when
|
||||||
// the stack is in map mode.
|
// the stack is in map mode.
|
||||||
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
if (expect_shift != .indent or line.shift != .indent or dangling_key == null)
|
||||||
return error.UnexpectedValue;
|
return error.UnexpectedValue;
|
||||||
|
|
||||||
expect_shift = .dedent;
|
expect_shift = .dedent;
|
||||||
@ -921,7 +379,7 @@ pub const Parser = struct {
|
|||||||
//
|
//
|
||||||
// dedenting back to the map stack level requires map_item
|
// dedenting back to the map stack level requires map_item
|
||||||
|
|
||||||
if (expect_shift != .indent or line.indent != .indent or dangling_key == null)
|
if (expect_shift != .indent or line.shift != .indent or dangling_key == null)
|
||||||
return error.UnexpectedValue;
|
return error.UnexpectedValue;
|
||||||
|
|
||||||
const new_list = try putMapGetValue(map, dangling_key.?, Value.newList(arena_alloc), self.dupe_behavior);
|
const new_list = try putMapGetValue(map, dangling_key.?, Value.newList(arena_alloc), self.dupe_behavior);
|
||||||
@ -931,7 +389,7 @@ pub const Parser = struct {
|
|||||||
continue :flipflop;
|
continue :flipflop;
|
||||||
},
|
},
|
||||||
.map_item => |pair| {
|
.map_item => |pair| {
|
||||||
if (flop or (line.indent == .none or line.indent == .dedent)) {
|
if (flop or (line.shift == .none or line.shift == .dedent)) {
|
||||||
expect_shift = .none;
|
expect_shift = .none;
|
||||||
const dupekey = try arena_alloc.dupe(u8, pair.key);
|
const dupekey = try arena_alloc.dupe(u8, pair.key);
|
||||||
switch (pair.val) {
|
switch (pair.val) {
|
||||||
@ -944,7 +402,7 @@ pub const Parser = struct {
|
|||||||
.flow_list => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
|
.flow_list => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_list, self.dupe_behavior), self.dupe_behavior),
|
||||||
.flow_map => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior),
|
.flow_map => |str| try putMap(map, dupekey, try parseFlow(arena_alloc, str, .flow_map, self.dupe_behavior), self.dupe_behavior),
|
||||||
}
|
}
|
||||||
} else if (line.indent == .indent) {
|
} else if (line.shift == .indent) {
|
||||||
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
|
if (expect_shift != .indent or dangling_key == null) return error.UnexpectedValue;
|
||||||
|
|
||||||
const new_map = try putMapGetValue(map, dangling_key.?, Value.newMap(arena_alloc), self.dupe_behavior);
|
const new_map = try putMapGetValue(map, dangling_key.?, Value.newMap(arena_alloc), self.dupe_behavior);
|
||||||
@ -967,6 +425,7 @@ pub const Parser = struct {
|
|||||||
|
|
||||||
switch (state) {
|
switch (state) {
|
||||||
.initial => switch (self.default_object) {
|
.initial => switch (self.default_object) {
|
||||||
|
.scalar => document.root = .{ .scalar = std.ArrayList(u8).init(arena_alloc) },
|
||||||
.string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) },
|
.string => document.root = .{ .string = std.ArrayList(u8).init(arena_alloc) },
|
||||||
.list => document.root = Value.newList(arena_alloc),
|
.list => document.root = Value.newList(arena_alloc),
|
||||||
.map => document.root = Value.newMap(arena_alloc),
|
.map => document.root = Value.newMap(arena_alloc),
|
||||||
@ -1256,47 +715,4 @@ pub const Parser = struct {
|
|||||||
|
|
||||||
return gop.value_ptr;
|
return gop.value_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dumpBufLines(self: *Parser, buf: []const u8) Error!void {
|
|
||||||
var tok: LineTokenizer = .{ .buffer = buf, .diagnostics = &self.diagnostics };
|
|
||||||
while (try tok.next()) |line| {
|
|
||||||
dumpLine(line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dumpLine(line: LineTokenizer.Line) void {
|
|
||||||
var dedbuf: [64]u8 = .{0} ** 64;
|
|
||||||
var keybuf: [2048]u8 = .{0} ** 2048;
|
|
||||||
var valbuf: [2048]u8 = .{0} ** 2048;
|
|
||||||
|
|
||||||
const shiftstr = if (line.indent == .dedent)
|
|
||||||
std.fmt.bufPrint(&dedbuf, " ({d})", .{line.indent.dedent}) catch unreachable
|
|
||||||
else
|
|
||||||
"";
|
|
||||||
|
|
||||||
std.debug.print("{s}{s}: {s} => {s}\n", .{
|
|
||||||
@tagName(line.indent), shiftstr, @tagName(line.contents), switch (line.contents) {
|
|
||||||
.comment => |str| str,
|
|
||||||
.in_line, .list_item => |scalar| switch (scalar) {
|
|
||||||
.empty => "[empty]",
|
|
||||||
.scalar,
|
|
||||||
.string,
|
|
||||||
.flow_list,
|
|
||||||
.flow_map,
|
|
||||||
=> |str| std.fmt.bufPrint(&keybuf, "{s} => {s}", .{ @tagName(scalar), str }) catch unreachable,
|
|
||||||
},
|
|
||||||
.map_item => |map| std.fmt.bufPrint(&keybuf, "{s} : {s}", .{
|
|
||||||
map.key,
|
|
||||||
switch (map.val) {
|
|
||||||
.empty => "[empty]",
|
|
||||||
.scalar,
|
|
||||||
.string,
|
|
||||||
.flow_list,
|
|
||||||
.flow_map,
|
|
||||||
=> |str| std.fmt.bufPrint(&valbuf, "{s} => {s}", .{ @tagName(map.val), str }) catch unreachable,
|
|
||||||
},
|
|
||||||
}) catch unreachable,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
};
|
151
src/parser/value.zig
Normal file
151
src/parser/value.zig
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
pub const Value = union(enum) {
|
||||||
|
pub const String = std.ArrayList(u8);
|
||||||
|
pub const Map = std.StringArrayHashMap(Value);
|
||||||
|
pub const List = std.ArrayList(Value);
|
||||||
|
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
||||||
|
|
||||||
|
scalar: String,
|
||||||
|
string: String,
|
||||||
|
list: List,
|
||||||
|
flow_list: List,
|
||||||
|
map: Map,
|
||||||
|
flow_map: Map,
|
||||||
|
|
||||||
|
pub inline fn fromScalar(alloc: std.mem.Allocator, input: []const u8) !Value {
|
||||||
|
return try _fromScalarOrString(alloc, .scalar, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn fromString(alloc: std.mem.Allocator, input: []const u8) !Value {
|
||||||
|
return try _fromScalarOrString(alloc, .string, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
|
||||||
|
var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
|
||||||
|
@field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newScalar(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .scalar = String.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newString(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .string = String.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newList(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .list = List.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .flow_list = List.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newMap(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .map = Map.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
|
||||||
|
return .{ .flow_map = Map.init(alloc) };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn recursiveEqualsExact(self: Value, other: Value) bool {
|
||||||
|
if (@as(TagType, self) != other) return false;
|
||||||
|
switch (self) {
|
||||||
|
inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items),
|
||||||
|
inline .list, .flow_list => |lst, tag| {
|
||||||
|
const olst = @field(other, @tagName(tag));
|
||||||
|
|
||||||
|
if (lst.items.len != olst.items.len) return false;
|
||||||
|
for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
inline .map, .flow_map => |map, tag| {
|
||||||
|
const omap = @field(other, @tagName(tag));
|
||||||
|
|
||||||
|
if (map.count() != omap.count()) return false;
|
||||||
|
var iter = map.iterator();
|
||||||
|
var oiter = omap.iterator();
|
||||||
|
// this loop structure enforces that the maps are in the same order
|
||||||
|
while (iter.next()) |this| {
|
||||||
|
const that = oiter.next() orelse return false;
|
||||||
|
if (!std.mem.eql(u8, this.key_ptr.*, that.key_ptr.*) or !this.value_ptr.recursiveEqualsExact(that.value_ptr.*)) return false;
|
||||||
|
}
|
||||||
|
// the maps are equal if we have also consumed all of the values from
|
||||||
|
// other.
|
||||||
|
return oiter.next() == null;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn printDebug(self: Value) void {
|
||||||
|
self.printRecursive(0);
|
||||||
|
std.debug.print("\n", .{});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn printRecursive(self: Value, indent: usize) void {
|
||||||
|
switch (self) {
|
||||||
|
.scalar, .string => |str| {
|
||||||
|
if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
|
||||||
|
var lines = std.mem.splitScalar(u8, str.items, '\n');
|
||||||
|
std.debug.print("\n", .{});
|
||||||
|
while (lines.next()) |line| {
|
||||||
|
std.debug.print(
|
||||||
|
"{[empty]s: >[indent]}{[line]s}{[nl]s}",
|
||||||
|
.{
|
||||||
|
.empty = "",
|
||||||
|
.indent = indent,
|
||||||
|
.line = line,
|
||||||
|
.nl = if (lines.peek() == null) "" else "\n",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std.debug.print("{s}", .{str.items});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.list, .flow_list => |list| {
|
||||||
|
if (list.items.len == 0) {
|
||||||
|
std.debug.print("[]", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std.debug.print("[\n", .{});
|
||||||
|
for (list.items, 0..) |value, idx| {
|
||||||
|
std.debug.print("{[empty]s: >[indent]}[{[idx]d}] = ", .{ .empty = "", .indent = indent, .idx = idx });
|
||||||
|
value.printRecursive(indent + 2);
|
||||||
|
std.debug.print(",\n", .{});
|
||||||
|
}
|
||||||
|
std.debug.print(
|
||||||
|
"{[empty]s: >[indent]}]",
|
||||||
|
.{ .empty = "", .indent = indent },
|
||||||
|
);
|
||||||
|
},
|
||||||
|
.map, .flow_map => |map| {
|
||||||
|
if (map.count() == 0) {
|
||||||
|
std.debug.print("{{}}", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std.debug.print("{{\n", .{});
|
||||||
|
|
||||||
|
var iter = map.iterator();
|
||||||
|
|
||||||
|
while (iter.next()) |entry| {
|
||||||
|
std.debug.print(
|
||||||
|
"{[empty]s: >[indent]}{[key]s}: ",
|
||||||
|
.{ .empty = "", .indent = indent + 2, .key = entry.key_ptr.* },
|
||||||
|
);
|
||||||
|
entry.value_ptr.printRecursive(indent + 4);
|
||||||
|
std.debug.print(",\n", .{});
|
||||||
|
}
|
||||||
|
std.debug.print(
|
||||||
|
"{[empty]s: >[indent]}}}",
|
||||||
|
.{ .empty = "", .indent = indent },
|
||||||
|
);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
251
src/tokenizer.zig
Normal file
251
src/tokenizer.zig
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
const Diagnostics = @import("./parser.zig").Diagnostics;
|
||||||
|
|
||||||
|
pub const Error = error{
|
||||||
|
BadToken,
|
||||||
|
MixedIndentation,
|
||||||
|
UnquantizedIndentation,
|
||||||
|
TooMuchIndentation,
|
||||||
|
MissingNewline,
|
||||||
|
TrailingWhitespace,
|
||||||
|
Impossible,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const DetectedIndentation = union(enum) {
|
||||||
|
unknown: void,
|
||||||
|
spaces: usize,
|
||||||
|
tabs: void,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const InlineItem = union(enum) {
|
||||||
|
empty: void,
|
||||||
|
scalar: []const u8,
|
||||||
|
line_string: []const u8,
|
||||||
|
space_string: []const u8,
|
||||||
|
|
||||||
|
flow_list: []const u8,
|
||||||
|
flow_map: []const u8,
|
||||||
|
|
||||||
|
pub fn lineEnding(self: InlineItem) u8 {
|
||||||
|
return switch (self) {
|
||||||
|
.line_string => '\n',
|
||||||
|
.space_string => ' ',
|
||||||
|
else => unreachable,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const LineContents = union(enum) {
|
||||||
|
comment: []const u8,
|
||||||
|
|
||||||
|
in_line: InlineItem,
|
||||||
|
list_item: InlineItem,
|
||||||
|
map_item: struct { key: []const u8, val: InlineItem },
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const ShiftDirection = enum { indent, dedent, none };
|
||||||
|
|
||||||
|
pub const LineShift = union(ShiftDirection) {
|
||||||
|
indent: void,
|
||||||
|
// we can dedent multiple levels at once.
|
||||||
|
dedent: usize,
|
||||||
|
none: void,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const Line = struct {
|
||||||
|
shift: LineShift,
|
||||||
|
contents: LineContents,
|
||||||
|
raw: []const u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
// buffer is expected to be either LineBuffer or FixedLineBuffer, but can
|
||||||
|
// technically be anything with a `nextLine` method
|
||||||
|
pub fn LineTokenizer(comptime Buffer: type) type {
|
||||||
|
return struct {
|
||||||
|
buffer: Buffer,
|
||||||
|
index: usize = 0,
|
||||||
|
indentation: DetectedIndentation = .unknown,
|
||||||
|
last_indent: usize = 0,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
|
row: usize = 0,
|
||||||
|
|
||||||
|
pub fn next(self: *@This()) Error!?Line {
|
||||||
|
lineloop: while (self.buffer.nextLine()) |raw_line| {
|
||||||
|
var indent: usize = 0;
|
||||||
|
for (raw_line, 0..) |char, idx| {
|
||||||
|
switch (char) {
|
||||||
|
' ' => {
|
||||||
|
switch (self.indentation) {
|
||||||
|
// There's a weird coupling here because we can't set this until
|
||||||
|
// all spaces have been consumed. I also thought about ignoring
|
||||||
|
// spaces on comment lines since those don't affect the
|
||||||
|
// relative indent/dedent, but then we would allow comments
|
||||||
|
// to ignore our indent quantum, which I dislike due to it making
|
||||||
|
// ugly documents.
|
||||||
|
.unknown => self.indentation = .{ .spaces = 0 },
|
||||||
|
.spaces => {},
|
||||||
|
.tabs => return error.MixedIndentation,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'\t' => {
|
||||||
|
switch (self.indentation) {
|
||||||
|
.unknown => self.indentation = .tabs,
|
||||||
|
.spaces => return error.MixedIndentation,
|
||||||
|
.tabs => {},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'\r' => {
|
||||||
|
return error.BadToken;
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
indent = idx;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (raw_line.len > 0) return error.TrailingWhitespace;
|
||||||
|
continue :lineloop;
|
||||||
|
}
|
||||||
|
|
||||||
|
var quantized: usize = if (self.indentation == .spaces) quant: {
|
||||||
|
if (self.indentation.spaces == 0) {
|
||||||
|
self.indentation.spaces = indent;
|
||||||
|
}
|
||||||
|
if (@rem(indent, self.indentation.spaces) != 0)
|
||||||
|
return error.UnquantizedIndentation;
|
||||||
|
|
||||||
|
break :quant @divExact(indent, self.indentation.spaces);
|
||||||
|
} else indent;
|
||||||
|
|
||||||
|
const shift: LineShift = if (quantized > self.last_indent) rel: {
|
||||||
|
if ((quantized - self.last_indent) > 1)
|
||||||
|
return error.TooMuchIndentation;
|
||||||
|
break :rel .indent;
|
||||||
|
} else if (quantized < self.last_indent)
|
||||||
|
.{ .dedent = self.last_indent - quantized }
|
||||||
|
else
|
||||||
|
.none;
|
||||||
|
|
||||||
|
defer {
|
||||||
|
self.row += 1;
|
||||||
|
self.last_indent = quantized;
|
||||||
|
}
|
||||||
|
|
||||||
|
const line = raw_line[indent..];
|
||||||
|
|
||||||
|
// this should not be possible, as empty lines are caught earlier.
|
||||||
|
if (line.len == 0) return error.Impossible;
|
||||||
|
|
||||||
|
switch (line[0]) {
|
||||||
|
'#' => {
|
||||||
|
// force comments to be followed by a space. This makes them
|
||||||
|
// behave the same way as strings, actually.
|
||||||
|
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
|
// simply lie about indentation when the line is a comment.
|
||||||
|
quantized = self.last_indent;
|
||||||
|
return .{
|
||||||
|
.shift = .none,
|
||||||
|
.contents = .{ .comment = line[1..] },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
'|', '>', '[', '{' => {
|
||||||
|
return .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .in_line = try detectInlineItem(line) },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
'-' => {
|
||||||
|
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
|
return if (line.len == 1) .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .list_item = .empty },
|
||||||
|
.raw = line,
|
||||||
|
} else .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
for (line, 0..) |char, idx| {
|
||||||
|
if (char == ':') {
|
||||||
|
if (idx + 1 == line.len) return .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (line[idx + 1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
|
return .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .map_item = .{
|
||||||
|
.key = line[0..idx],
|
||||||
|
.val = try detectInlineItem(line[idx + 2 ..]),
|
||||||
|
} },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return .{
|
||||||
|
.shift = shift,
|
||||||
|
.contents = .{ .in_line = .{ .scalar = line } },
|
||||||
|
.raw = line,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// somehow everything else has failed
|
||||||
|
return error.Impossible;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
||||||
|
if (buf.len == 0) return .empty;
|
||||||
|
|
||||||
|
switch (buf[0]) {
|
||||||
|
'>', '|' => |char| {
|
||||||
|
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
|
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
||||||
|
' ', '\t' => return error.TrailingWhitespace,
|
||||||
|
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
||||||
|
else => buf[@min(2, buf.len)..buf.len],
|
||||||
|
};
|
||||||
|
|
||||||
|
return if (char == '>')
|
||||||
|
.{ .line_string = slice }
|
||||||
|
else
|
||||||
|
.{ .space_string = slice };
|
||||||
|
},
|
||||||
|
'[' => {
|
||||||
|
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
||||||
|
return error.BadToken;
|
||||||
|
|
||||||
|
// keep the closing ] for the flow parser
|
||||||
|
return .{ .flow_list = buf[1..] };
|
||||||
|
},
|
||||||
|
'{' => {
|
||||||
|
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
||||||
|
return error.BadToken;
|
||||||
|
|
||||||
|
// keep the closing } fpr the flow parser
|
||||||
|
return .{ .flow_map = buf[1..] };
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
||||||
|
return error.TrailingWhitespace;
|
||||||
|
|
||||||
|
return .{ .scalar = buf };
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user