Compare commits
No commits in common. "95a15adad72e628a14ed039e85a3a54e0482174f" and "6c1eb176be0852eaac540218f66664e0476021b2" have entirely different histories.
95a15adad7
...
6c1eb176be
268
src/config.zig
268
src/config.zig
@ -63,123 +63,38 @@
|
|||||||
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
pub const IndexSlice = struct { start: usize, len: usize };
|
|
||||||
|
|
||||||
pub const Diagnostics = struct {
|
pub const Diagnostics = struct {
|
||||||
row: usize,
|
row: usize,
|
||||||
span: struct { absolute: usize, line_offset: usize, length: usize },
|
span: struct { absolute: usize, line_offset: usize, length: usize },
|
||||||
message: []const u8,
|
message: []const u8,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const LineBuffer = struct {
|
pub const LineTokenizer = struct {
|
||||||
allocator: std.mem.Allocator,
|
|
||||||
buffer: []u8,
|
|
||||||
used: usize,
|
|
||||||
window: IndexSlice,
|
|
||||||
|
|
||||||
pub const default_capacity: usize = 4096;
|
|
||||||
pub const Error = std.mem.Allocator.Error;
|
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) Error!LineBuffer {
|
|
||||||
return initCapacity(allocator, default_capacity);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) Error!LineBuffer {
|
|
||||||
return .{
|
|
||||||
.allocator = allocator,
|
|
||||||
.buffer = try allocator.alloc(u8, capacity),
|
|
||||||
.used = 0,
|
|
||||||
.window = .{ .start = 0, .len = 0 },
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn feed(self: *LineBuffer, data: []const u8) Error!void {
|
|
||||||
if (data.len == 0) return;
|
|
||||||
// TODO: check for usize overflow here if we want Maximum Robustness
|
|
||||||
const new_window_len = self.window.len + data.len;
|
|
||||||
|
|
||||||
// data cannot fit in the buffer with our scan window, so we have to realloc
|
|
||||||
if (new_window_len > self.buffer.len) {
|
|
||||||
// TODO: adopt an overallocation strategy? Will potentially avoid allocating
|
|
||||||
// on every invocation but will cause the buffer to oversize
|
|
||||||
try self.allocator.realloc(self.buffer, new_window_len);
|
|
||||||
self.rehome();
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
self.used = new_window_len;
|
|
||||||
self.window.len = new_window_len;
|
|
||||||
}
|
|
||||||
// data will fit, but needs to be moved in the buffer
|
|
||||||
else if (self.window.start + new_window_len > self.buffer.len) {
|
|
||||||
self.rehome();
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
self.used = new_window_len;
|
|
||||||
self.window.len = new_window_len;
|
|
||||||
}
|
|
||||||
// data can simply be appended
|
|
||||||
else {
|
|
||||||
@memcpy(self.buffer[self.used..].ptr, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The memory returned by this function is valid until the next call to `feed`.
|
|
||||||
/// The resulting slice does not include the newline character.
|
|
||||||
pub fn nextLine(self: *LineBuffer) ?[]const u8 {
|
|
||||||
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
||||||
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
||||||
|
|
||||||
self.window.start += split + 1;
|
|
||||||
self.window.len -= split + 1;
|
|
||||||
|
|
||||||
return window[0..split];
|
|
||||||
}
|
|
||||||
|
|
||||||
fn rehome(self: *LineBuffer) void {
|
|
||||||
if (self.window.start == 0) return;
|
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
|
||||||
|
|
||||||
if (self.window.len > self.window.start)
|
|
||||||
std.mem.copyForwards(u8, self.buffer, window)
|
|
||||||
else
|
|
||||||
@memcpy(self.buffer.ptr, window);
|
|
||||||
|
|
||||||
self.window.start = 0;
|
|
||||||
self.used = window.len;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const FixedLineBuffer = struct {
|
|
||||||
buffer: []const u8,
|
buffer: []const u8,
|
||||||
window: IndexSlice,
|
index: usize = 0,
|
||||||
|
indentation: IndentationType = .immaterial,
|
||||||
|
last_indent: usize = 0,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
|
|
||||||
pub fn init(data: []const u8) FixedLineBuffer {
|
row: usize = 0,
|
||||||
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn nextLine(self: *FixedLineBuffer) ?[]const u8 {
|
const Error = error{
|
||||||
if (self.window.start >= self.buffer.len or self.window.len == 0)
|
BadToken,
|
||||||
return null;
|
MixedIndentation,
|
||||||
|
UnquantizedIndentation,
|
||||||
|
TooMuchIndentation,
|
||||||
|
MissingNewline,
|
||||||
|
TrailingWhitespace,
|
||||||
|
Impossible,
|
||||||
|
};
|
||||||
|
|
||||||
const window = self.buffer[self.window.start..][0..self.window.len];
|
const IndentationType = union(enum) {
|
||||||
const split = std.mem.indexOfScalar(u8, window, '\n') orelse return null;
|
|
||||||
|
|
||||||
self.window.start += split + 1;
|
|
||||||
self.window.len -= split + 1;
|
|
||||||
|
|
||||||
return window[0..split];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const IndentationType = union(enum) {
|
|
||||||
immaterial: void,
|
immaterial: void,
|
||||||
spaces: usize,
|
spaces: usize,
|
||||||
tabs: void,
|
tabs: void,
|
||||||
};
|
};
|
||||||
|
|
||||||
const InlineItem = union(enum) {
|
const InlineItem = union(enum) {
|
||||||
empty: void,
|
empty: void,
|
||||||
scalar: []const u8,
|
scalar: []const u8,
|
||||||
line_string: []const u8,
|
line_string: []const u8,
|
||||||
@ -195,67 +110,51 @@ const InlineItem = union(enum) {
|
|||||||
else => unreachable,
|
else => unreachable,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const LineContents = union(enum) {
|
const LineContents = union(enum) {
|
||||||
comment: []const u8,
|
comment: []const u8,
|
||||||
|
|
||||||
in_line: InlineItem,
|
in_line: InlineItem,
|
||||||
list_item: InlineItem,
|
list_item: InlineItem,
|
||||||
map_item: struct { key: []const u8, val: InlineItem },
|
map_item: struct { key: []const u8, val: InlineItem },
|
||||||
};
|
};
|
||||||
|
|
||||||
// we can dedent multiple levels at once. Example:
|
// we can dedent multiple levels at once. Example:
|
||||||
//
|
//
|
||||||
// foo:
|
// foo:
|
||||||
// bar:
|
// bar:
|
||||||
// > a
|
// > a
|
||||||
// > string
|
// > string
|
||||||
// baz: [qux]
|
// baz: [qux]
|
||||||
//
|
//
|
||||||
// capturing this is conceptually simple, but implementing it without complex
|
// capturing this is conceptually simple, but implementing it without complex
|
||||||
// indentation tracking requires quantizing the indentation. This means our
|
// indentation tracking requires quantizing the indentation. This means our
|
||||||
// IndentationType will also need to track the number of spaces used for
|
// IndentationType will also need to track the number of spaces used for
|
||||||
// indentation, as detected. Then every line we have to check indent rem the
|
// indentation, as detected. Then every line we have to check indent rem the
|
||||||
// quantization level == 0 (otherwise we broke quantization) and compute indent
|
// quantization level == 0 (otherwise we broke quantization) and compute indent
|
||||||
// div the quantization level to give us our effective indentation level.
|
// div the quantization level to give us our effective indentation level.
|
||||||
|
|
||||||
const ShiftDirection = enum { indent, dedent, none };
|
const ShiftDirection = enum { indent, dedent, none };
|
||||||
const RelativeIndent = union(ShiftDirection) {
|
const RelativeIndent = union(ShiftDirection) {
|
||||||
indent: void,
|
indent: void,
|
||||||
dedent: usize,
|
dedent: usize,
|
||||||
none: void,
|
none: void,
|
||||||
};
|
};
|
||||||
|
|
||||||
const Line = struct {
|
const Line = struct {
|
||||||
indent: RelativeIndent,
|
indent: RelativeIndent,
|
||||||
contents: LineContents,
|
contents: LineContents,
|
||||||
raw: []const u8,
|
raw: []const u8,
|
||||||
};
|
|
||||||
|
|
||||||
pub fn LineTokenizer(comptime Buffer: type) type {
|
|
||||||
return struct {
|
|
||||||
buffer: Buffer,
|
|
||||||
index: usize = 0,
|
|
||||||
indentation: IndentationType = .immaterial,
|
|
||||||
last_indent: usize = 0,
|
|
||||||
diagnostics: *Diagnostics,
|
|
||||||
row: usize = 0,
|
|
||||||
|
|
||||||
const Error = error{
|
|
||||||
BadToken,
|
|
||||||
MixedIndentation,
|
|
||||||
UnquantizedIndentation,
|
|
||||||
TooMuchIndentation,
|
|
||||||
MissingNewline,
|
|
||||||
TrailingWhitespace,
|
|
||||||
Impossible,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn next(self: *@This()) Error!?Line {
|
pub fn next(self: *LineTokenizer) Error!?Line {
|
||||||
lineloop: while (self.buffer.nextLine()) |raw_line| {
|
if (self.index == self.buffer.len) return null;
|
||||||
|
|
||||||
var indent: usize = 0;
|
var indent: usize = 0;
|
||||||
for (raw_line, 0..) |char, idx| {
|
var offset: usize = 0;
|
||||||
|
|
||||||
|
for (self.buffer[self.index..], 0..) |char, idx| {
|
||||||
switch (char) {
|
switch (char) {
|
||||||
' ' => {
|
' ' => {
|
||||||
switch (self.indentation) {
|
switch (self.indentation) {
|
||||||
@ -269,6 +168,7 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.spaces => {},
|
.spaces => {},
|
||||||
.tabs => return error.MixedIndentation,
|
.tabs => return error.MixedIndentation,
|
||||||
}
|
}
|
||||||
|
indent += 1;
|
||||||
},
|
},
|
||||||
'\t' => {
|
'\t' => {
|
||||||
switch (self.indentation) {
|
switch (self.indentation) {
|
||||||
@ -276,28 +176,40 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.spaces => return error.MixedIndentation,
|
.spaces => return error.MixedIndentation,
|
||||||
.tabs => {},
|
.tabs => {},
|
||||||
}
|
}
|
||||||
|
indent += 1;
|
||||||
},
|
},
|
||||||
'\r' => {
|
'\r' => {
|
||||||
return error.BadToken;
|
return error.BadToken;
|
||||||
},
|
},
|
||||||
else => {
|
'\n' => {
|
||||||
indent = idx;
|
// don't even emit anything for empty rows.
|
||||||
break;
|
self.row += 1;
|
||||||
|
offset = idx + 1;
|
||||||
|
// if it's too hard to deal with, Just Make It An Error!!!
|
||||||
|
// an empty line with whitespace on it is garbage. It can mess with
|
||||||
|
// the indentation detection grossly in a way that is annoying to
|
||||||
|
// deal with. Besides, having whitespace-only lines in a document
|
||||||
|
// is essentially terrorism, with which negotiations are famously
|
||||||
|
// not permitted.
|
||||||
|
if (indent > 0) return error.TrailingWhitespace;
|
||||||
},
|
},
|
||||||
|
else => break,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (raw_line.len > 0) return error.TrailingWhitespace;
|
std.debug.assert(self.buffer.len == self.index + indent + offset + 1);
|
||||||
continue :lineloop;
|
self.index = self.buffer.len;
|
||||||
|
// this prong will get hit when the document only consists of whitespace
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
var quantized: usize = if (self.indentation == .spaces) quant: {
|
var quantized: usize = if (self.indentation == .spaces) blk: {
|
||||||
if (self.indentation.spaces == 0) {
|
if (self.indentation.spaces == 0) {
|
||||||
self.indentation.spaces = indent;
|
self.indentation.spaces = indent;
|
||||||
}
|
}
|
||||||
if (@rem(indent, self.indentation.spaces) != 0)
|
if (@rem(indent, self.indentation.spaces) != 0)
|
||||||
return error.UnquantizedIndentation;
|
return error.UnquantizedIndentation;
|
||||||
|
|
||||||
break :quant @divExact(indent, self.indentation.spaces);
|
break :blk @divExact(indent, self.indentation.spaces);
|
||||||
} else indent;
|
} else indent;
|
||||||
|
|
||||||
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
|
const relative: RelativeIndent = if (quantized > self.last_indent) rel: {
|
||||||
@ -309,12 +221,16 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
else
|
else
|
||||||
.none;
|
.none;
|
||||||
|
|
||||||
|
offset += indent;
|
||||||
|
|
||||||
defer {
|
defer {
|
||||||
self.row += 1;
|
self.row += 1;
|
||||||
self.last_indent = quantized;
|
self.last_indent = quantized;
|
||||||
|
self.index += offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
const line = raw_line[indent..];
|
const line = try consumeLine(self.buffer[self.index + offset ..]);
|
||||||
|
offset += line.len + 1;
|
||||||
|
|
||||||
// this should not be possible, as empty lines are caught earlier.
|
// this should not be possible, as empty lines are caught earlier.
|
||||||
if (line.len == 0) return error.Impossible;
|
if (line.len == 0) return error.Impossible;
|
||||||
@ -378,11 +294,6 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
};
|
};
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// somehow everything else has failed
|
|
||||||
return error.Impossible;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
||||||
@ -425,12 +336,23 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
}
|
fn consumeLine(buf: []const u8) ![]const u8 {
|
||||||
|
for (buf, 0..) |char, idx| {
|
||||||
|
switch (char) {
|
||||||
|
'\n' => return buf[0..idx],
|
||||||
|
'\r' => return error.BadToken,
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return error.MissingNewline;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
pub const Value = union(enum) {
|
pub const Value = union(enum) {
|
||||||
pub const String = std.ArrayList(u8);
|
pub const String = std.ArrayList(u8);
|
||||||
pub const Map = std.StringArrayHashMap(Value);
|
pub const Map = std.StringHashMap(Value);
|
||||||
pub const List = std.ArrayList(Value);
|
pub const List = std.ArrayList(Value);
|
||||||
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
pub const TagType = @typeInfo(Value).Union.tag_type.?;
|
||||||
|
|
||||||
@ -567,7 +489,7 @@ pub const Parser = struct {
|
|||||||
DuplicateKey,
|
DuplicateKey,
|
||||||
BadMapEntry,
|
BadMapEntry,
|
||||||
Fail,
|
Fail,
|
||||||
} || LineTokenizer(FixedLineBuffer).Error || FlowParser.Error || std.mem.Allocator.Error;
|
} || LineTokenizer.Error || FlowParser.Error || std.mem.Allocator.Error;
|
||||||
|
|
||||||
pub const DuplicateKeyBehavior = enum {
|
pub const DuplicateKeyBehavior = enum {
|
||||||
use_first,
|
use_first,
|
||||||
@ -614,7 +536,7 @@ pub const Parser = struct {
|
|||||||
document: Document,
|
document: Document,
|
||||||
value_stack: Stack,
|
value_stack: Stack,
|
||||||
state: ParseState = .initial,
|
state: ParseState = .initial,
|
||||||
expect_shift: ShiftDirection = .none,
|
expect_shift: LineTokenizer.ShiftDirection = .none,
|
||||||
dangling_key: ?[]const u8 = null,
|
dangling_key: ?[]const u8 = null,
|
||||||
|
|
||||||
pub fn init(alloc: std.mem.Allocator) State {
|
pub fn init(alloc: std.mem.Allocator) State {
|
||||||
@ -635,16 +557,12 @@ pub const Parser = struct {
|
|||||||
const arena_alloc = document.arena.allocator();
|
const arena_alloc = document.arena.allocator();
|
||||||
|
|
||||||
var state: ParseState = .initial;
|
var state: ParseState = .initial;
|
||||||
var expect_shift: ShiftDirection = .none;
|
var expect_shift: LineTokenizer.ShiftDirection = .none;
|
||||||
var dangling_key: ?[]const u8 = null;
|
var dangling_key: ?[]const u8 = null;
|
||||||
var stack = std.ArrayList(*Value).init(arena_alloc);
|
var stack = std.ArrayList(*Value).init(arena_alloc);
|
||||||
defer stack.deinit();
|
defer stack.deinit();
|
||||||
|
|
||||||
var tok: LineTokenizer(FixedLineBuffer) = .{
|
var tok: LineTokenizer = .{ .buffer = buffer, .diagnostics = &self.diagnostics };
|
||||||
.buffer = FixedLineBuffer.init(buffer),
|
|
||||||
.diagnostics = &self.diagnostics,
|
|
||||||
};
|
|
||||||
|
|
||||||
while (try tok.next()) |line| {
|
while (try tok.next()) |line| {
|
||||||
if (line.contents == .comment) continue;
|
if (line.contents == .comment) continue;
|
||||||
|
|
||||||
@ -727,7 +645,7 @@ pub const Parser = struct {
|
|||||||
// key somewhere until we can consume the
|
// key somewhere until we can consume the
|
||||||
// value. More parser state to lug along.
|
// value. More parser state to lug along.
|
||||||
|
|
||||||
dangling_key = try arena_alloc.dupe(u8, pair.key);
|
dangling_key = pair.key;
|
||||||
state = .value;
|
state = .value;
|
||||||
},
|
},
|
||||||
.scalar => |str| {
|
.scalar => |str| {
|
||||||
@ -897,7 +815,7 @@ pub const Parser = struct {
|
|||||||
|
|
||||||
switch (pair.val) {
|
switch (pair.val) {
|
||||||
.empty => {
|
.empty => {
|
||||||
dangling_key = try arena_alloc.dupe(u8, pair.key);
|
dangling_key = pair.key;
|
||||||
expect_shift = .indent;
|
expect_shift = .indent;
|
||||||
},
|
},
|
||||||
.scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
|
.scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
|
||||||
@ -995,7 +913,7 @@ pub const Parser = struct {
|
|||||||
.none, .dedent => switch (pair.val) {
|
.none, .dedent => switch (pair.val) {
|
||||||
.empty => {
|
.empty => {
|
||||||
expect_shift = .indent;
|
expect_shift = .indent;
|
||||||
dangling_key = try arena_alloc.dupe(u8, pair.key);
|
dangling_key = pair.key;
|
||||||
},
|
},
|
||||||
.scalar => |str| try putMap(map, pair.key, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
|
.scalar => |str| try putMap(map, pair.key, try Value.fromScalar(arena_alloc, str), self.dupe_behavior),
|
||||||
.line_string, .space_string => |str| try putMap(map, pair.key, try Value.fromString(arena_alloc, str), self.dupe_behavior),
|
.line_string, .space_string => |str| try putMap(map, pair.key, try Value.fromString(arena_alloc, str), self.dupe_behavior),
|
||||||
@ -1013,7 +931,7 @@ pub const Parser = struct {
|
|||||||
switch (pair.val) {
|
switch (pair.val) {
|
||||||
.empty => {
|
.empty => {
|
||||||
expect_shift = .indent;
|
expect_shift = .indent;
|
||||||
dangling_key = try arena_alloc.dupe(u8, pair.key);
|
dangling_key = pair.key;
|
||||||
},
|
},
|
||||||
.scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
|
.scalar => |str| try new_map.map.put(pair.key, try Value.fromScalar(arena_alloc, str)),
|
||||||
.line_string, .space_string => |str| try new_map.map.put(pair.key, try Value.fromString(arena_alloc, str)),
|
.line_string, .space_string => |str| try new_map.map.put(pair.key, try Value.fromString(arena_alloc, str)),
|
||||||
@ -1334,7 +1252,7 @@ pub const FlowParser = struct {
|
|||||||
.consuming_map_key => switch (char) {
|
.consuming_map_key => switch (char) {
|
||||||
':' => {
|
':' => {
|
||||||
const tip = try getStackTip(self.stack);
|
const tip = try getStackTip(self.stack);
|
||||||
dangling_key = try self.alloc.dupe(u8, self.buffer[tip.item_start..idx]);
|
dangling_key = self.buffer[tip.item_start..idx];
|
||||||
|
|
||||||
self.state = .want_map_value;
|
self.state = .want_map_value;
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user