parser: start the arduous journey of hooking up diagnostics
The errors in the line buffer and tokenizer now have diagnostics. The line number is trivial to keep track of due to the line buffer, but the column index requires quite a bit of juggling, as we pass successively trimmed down buffers to the internals of the parser. There will probably be some column index counting problems in the future. Also, handling the diagnostics is a bit awkward, since it's a mandatory out-parameter of the parse functions now. The user must provide a valid diagnostics object that survives for the life of the parser.
This commit is contained in:
parent
3258e7fdb5
commit
01f98f9aff
@ -15,7 +15,16 @@ pub fn main() !void {
|
|||||||
var needfree = true;
|
var needfree = true;
|
||||||
defer if (needfree) allocator.free(data);
|
defer if (needfree) allocator.free(data);
|
||||||
|
|
||||||
const document = try nice.parseBuffer(allocator, data, .{});
|
var diagnostics = nice.Diagnostics{};
|
||||||
|
const document = nice.parseBuffer(allocator, data, &diagnostics, .{}) catch |err| {
|
||||||
|
std.debug.print("{s}:{d} col:{d}: {s}\n", .{
|
||||||
|
args[1],
|
||||||
|
diagnostics.row,
|
||||||
|
diagnostics.line_offset,
|
||||||
|
diagnostics.message,
|
||||||
|
});
|
||||||
|
return err;
|
||||||
|
};
|
||||||
defer document.deinit();
|
defer document.deinit();
|
||||||
|
|
||||||
// free data memory to ensure that the parsed document is not holding
|
// free data memory to ensure that the parsed document is not holding
|
||||||
|
@ -16,6 +16,7 @@ pub fn main() !void {
|
|||||||
defer file.close();
|
defer file.close();
|
||||||
var parser = try nice.StreamParser.init(allocator, .{});
|
var parser = try nice.StreamParser.init(allocator, .{});
|
||||||
defer parser.deinit();
|
defer parser.deinit();
|
||||||
|
errdefer parser.parse_state.document.deinit();
|
||||||
while (true) {
|
while (true) {
|
||||||
var buf = [_]u8{0} ** 1024;
|
var buf = [_]u8{0} ** 1024;
|
||||||
const len = try file.read(&buf);
|
const len = try file.read(&buf);
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
|
const Diagnostics = @import("./parser.zig").Diagnostics;
|
||||||
|
|
||||||
pub const IndexSlice = struct { start: usize, len: usize };
|
pub const IndexSlice = struct { start: usize, len: usize };
|
||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
@ -45,14 +47,15 @@ pub fn LineBuffer(comptime options: Strictness) type {
|
|||||||
|
|
||||||
pub const default_capacity: usize = 4096;
|
pub const default_capacity: usize = 4096;
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) !@This() {
|
pub fn init(allocator: std.mem.Allocator, diagnostics: *Diagnostics) !@This() {
|
||||||
return initCapacity(allocator, default_capacity);
|
return initCapacity(allocator, diagnostics, default_capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) !@This() {
|
pub fn initCapacity(allocator: std.mem.Allocator, diagnostics: *Diagnostics, capacity: usize) !@This() {
|
||||||
return .{
|
return .{
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.internal = .{
|
.internal = .{
|
||||||
|
.diagnostics = diagnostics,
|
||||||
.buffer = try allocator.alloc(u8, capacity),
|
.buffer = try allocator.alloc(u8, capacity),
|
||||||
.window = .{ .start = 0, .len = 0 },
|
.window = .{ .start = 0, .len = 0 },
|
||||||
},
|
},
|
||||||
@ -60,6 +63,10 @@ pub fn LineBuffer(comptime options: Strictness) type {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn diag(self: @This()) *Diagnostics {
|
||||||
|
return self.internal.diagnostics;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn empty(self: @This()) bool {
|
pub fn empty(self: @This()) bool {
|
||||||
return self.internal.empty();
|
return self.internal.empty();
|
||||||
}
|
}
|
||||||
@ -111,9 +118,18 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
return struct {
|
return struct {
|
||||||
buffer: []const u8,
|
buffer: []const u8,
|
||||||
window: IndexSlice,
|
window: IndexSlice,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
|
|
||||||
pub fn init(data: []const u8) @This() {
|
pub fn init(data: []const u8, diagnostics: *Diagnostics) @This() {
|
||||||
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
return .{
|
||||||
|
.buffer = data,
|
||||||
|
.window = .{ .start = 0, .len = data.len },
|
||||||
|
.diagnostics = diagnostics,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn diag(self: @This()) *Diagnostics {
|
||||||
|
return self.diagnostics;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn empty(self: @This()) bool {
|
pub fn empty(self: @This()) bool {
|
||||||
@ -131,16 +147,33 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
const split: usize = split: {
|
const split: usize = split: {
|
||||||
for (window, 0..) |char, idx| {
|
for (window, 0..) |char, idx| {
|
||||||
if (comptime options.check_carriage_return)
|
if (comptime options.check_carriage_return)
|
||||||
if (char == '\r') return error.IllegalCarriageReturn;
|
if (char == '\r') {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found a carriage return";
|
||||||
|
return error.IllegalCarriageReturn;
|
||||||
|
};
|
||||||
|
|
||||||
if (comptime options.check_nonprinting_ascii)
|
if (comptime options.check_nonprinting_ascii)
|
||||||
if ((char != '\n' and char != '\t') and (char < ' ' or char == 0x7F))
|
if ((char != '\n' and char != '\t') and (char < ' ' or char == 0x7F)) {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found nonprinting ascii characters";
|
||||||
return error.IllegalNonprintingAscii;
|
return error.IllegalNonprintingAscii;
|
||||||
|
};
|
||||||
|
|
||||||
if (comptime options.check_trailing_whitespace) {
|
if (comptime options.check_trailing_whitespace) {
|
||||||
if (char == '\n') {
|
if (char == '\n') {
|
||||||
if (idx > 0 and (window[idx - 1] == ' ' or window[idx - 1] == '\t'))
|
if (idx > 0 and (window[idx - 1] == ' ' or window[idx - 1] == '\t')) {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found trailing spaces";
|
||||||
return error.IllegalTrailingSpace;
|
return error.IllegalTrailingSpace;
|
||||||
|
}
|
||||||
|
|
||||||
break :split idx;
|
break :split idx;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -150,12 +183,41 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = 0;
|
||||||
|
|
||||||
self.window.start += split + 1;
|
self.window.start += split + 1;
|
||||||
self.window.len -= split + 1;
|
self.window.len -= split + 1;
|
||||||
|
|
||||||
if (comptime options.validate_utf8) {
|
if (comptime options.validate_utf8) {
|
||||||
const line = window[0..split];
|
const line = window[0..split];
|
||||||
return if (std.unicode.utf8ValidateSlice(line)) line else error.InputIsNotValidUtf8;
|
|
||||||
|
var idx: usize = 0;
|
||||||
|
while (idx < line.len) {
|
||||||
|
if (std.unicode.utf8ByteSequenceLength(line[idx])) |cp_len| {
|
||||||
|
if (idx + cp_len > line.len) {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = cp_len;
|
||||||
|
self.diagnostics.message = "truncated UTF-8 sequence";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std.meta.isError(std.unicode.utf8Decode(line[idx .. idx + cp_len]))) {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = cp_len;
|
||||||
|
self.diagnostics.message = "invalid UTF-8 sequence";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
idx += cp_len;
|
||||||
|
} else |_| {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "invalid UTF-8 sequence start byte";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return line;
|
||||||
} else {
|
} else {
|
||||||
return window[0..split];
|
return window[0..split];
|
||||||
}
|
}
|
||||||
|
@ -68,3 +68,4 @@ pub const parseBuffer = parser.parseBuffer;
|
|||||||
pub const StreamParser = parser.StreamParser;
|
pub const StreamParser = parser.StreamParser;
|
||||||
pub const Document = parser.Document;
|
pub const Document = parser.Document;
|
||||||
pub const Value = parser.Value;
|
pub const Value = parser.Value;
|
||||||
|
pub const Diagnostics = parser.Diagnostics;
|
||||||
|
@ -8,14 +8,14 @@ pub const Value = @import("./parser/value.zig").Value;
|
|||||||
|
|
||||||
pub const Diagnostics = struct {
|
pub const Diagnostics = struct {
|
||||||
row: usize = 0,
|
row: usize = 0,
|
||||||
span: struct { absolute: usize = 0, line_offset: usize = 0, length: usize = 0 } = .{},
|
line_offset: usize = 0,
|
||||||
|
length: usize = 0,
|
||||||
message: []const u8 = "no problems",
|
message: []const u8 = "no problems",
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
UnexpectedIndent,
|
UnexpectedIndent,
|
||||||
UnexpectedValue,
|
UnexpectedValue,
|
||||||
ExtraContent,
|
|
||||||
EmptyDocument,
|
EmptyDocument,
|
||||||
DuplicateKey,
|
DuplicateKey,
|
||||||
BadMapEntry,
|
BadMapEntry,
|
||||||
@ -42,15 +42,13 @@ pub const Options = struct {
|
|||||||
default_object: enum { string, list, map, fail } = .fail,
|
default_object: enum { string, list, map, fail } = .fail,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, options: Options) !Document {
|
pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, diagnostics: *Diagnostics, options: Options) !Document {
|
||||||
var state = State.init(allocator);
|
var state = State.init(allocator, diagnostics);
|
||||||
defer state.deinit();
|
defer state.deinit();
|
||||||
errdefer state.document.deinit();
|
errdefer state.document.deinit();
|
||||||
|
|
||||||
var diagnostics = Diagnostics{};
|
|
||||||
var tok: tokenizer.LineTokenizer(buffers.ValidatingFixedLineBuffer) = .{
|
var tok: tokenizer.LineTokenizer(buffers.ValidatingFixedLineBuffer) = .{
|
||||||
.buffer = buffers.ValidatingFixedLineBuffer.init(buffer),
|
.buffer = buffers.ValidatingFixedLineBuffer.init(buffer, diagnostics),
|
||||||
.diagnostics = &diagnostics,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
while (try tok.next()) |line| try state.parseLine(line, options.duplicate_key_behavior);
|
while (try tok.next()) |line| try state.parseLine(line, options.duplicate_key_behavior);
|
||||||
@ -65,7 +63,6 @@ pub const StreamParser = struct {
|
|||||||
linetok: tokenizer.LineTokenizer(buffers.ValidatingLineBuffer),
|
linetok: tokenizer.LineTokenizer(buffers.ValidatingLineBuffer),
|
||||||
parse_state: State,
|
parse_state: State,
|
||||||
parse_options: Options = .{},
|
parse_options: Options = .{},
|
||||||
diagnostics: Diagnostics = .{},
|
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator, options: Options) !StreamParser {
|
pub fn init(allocator: std.mem.Allocator, options: Options) !StreamParser {
|
||||||
const diagnostics = try allocator.create(Diagnostics);
|
const diagnostics = try allocator.create(Diagnostics);
|
||||||
@ -74,16 +71,15 @@ pub const StreamParser = struct {
|
|||||||
|
|
||||||
return .{
|
return .{
|
||||||
.linetok = .{
|
.linetok = .{
|
||||||
.buffer = try buffers.ValidatingLineBuffer.init(allocator),
|
.buffer = try buffers.ValidatingLineBuffer.init(allocator, diagnostics),
|
||||||
.diagnostics = diagnostics,
|
|
||||||
},
|
},
|
||||||
.parse_state = State.init(allocator),
|
.parse_state = State.init(allocator, diagnostics),
|
||||||
.parse_options = options,
|
.parse_options = options,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: StreamParser) void {
|
pub fn deinit(self: StreamParser) void {
|
||||||
self.linetok.buffer.allocator.destroy(self.linetok.diagnostics);
|
self.linetok.buffer.allocator.destroy(self.parse_state.diagnostics);
|
||||||
self.linetok.buffer.deinit();
|
self.linetok.buffer.deinit();
|
||||||
self.parse_state.deinit();
|
self.parse_state.deinit();
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ const tokenizer = @import("../tokenizer.zig");
|
|||||||
const Error = @import("../parser.zig").Error;
|
const Error = @import("../parser.zig").Error;
|
||||||
const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
|
const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
|
||||||
const Options = @import("../parser.zig").Options;
|
const Options = @import("../parser.zig").Options;
|
||||||
|
const Diagnostics = @import("../parser.zig").Diagnostics;
|
||||||
const Value = @import("./value.zig").Value;
|
const Value = @import("./value.zig").Value;
|
||||||
|
|
||||||
pub const Document = struct {
|
pub const Document = struct {
|
||||||
@ -42,14 +43,16 @@ pub const State = struct {
|
|||||||
pub const Stack = std.ArrayList(*Value);
|
pub const Stack = std.ArrayList(*Value);
|
||||||
|
|
||||||
document: Document,
|
document: Document,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
value_stack: Stack,
|
value_stack: Stack,
|
||||||
mode: enum { initial, value, done } = .initial,
|
mode: enum { initial, value, done } = .initial,
|
||||||
expect_shift: tokenizer.ShiftDirection = .none,
|
expect_shift: tokenizer.ShiftDirection = .none,
|
||||||
dangling_key: ?[]const u8 = null,
|
dangling_key: ?[]const u8 = null,
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) State {
|
pub fn init(allocator: std.mem.Allocator, diagnostics: *Diagnostics) State {
|
||||||
return .{
|
return .{
|
||||||
.document = Document.init(allocator),
|
.document = Document.init(allocator),
|
||||||
|
.diagnostics = diagnostics,
|
||||||
.value_stack = Stack.init(allocator),
|
.value_stack = Stack.init(allocator),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,10 @@ const Diagnostics = @import("./parser.zig").Diagnostics;
|
|||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
BadToken,
|
BadToken,
|
||||||
|
ExtraContent,
|
||||||
MixedIndentation,
|
MixedIndentation,
|
||||||
UnquantizedIndentation,
|
|
||||||
TooMuchIndentation,
|
TooMuchIndentation,
|
||||||
MissingNewline,
|
UnquantizedIndentation,
|
||||||
TrailingWhitespace,
|
TrailingWhitespace,
|
||||||
Impossible,
|
Impossible,
|
||||||
};
|
};
|
||||||
@ -60,18 +60,19 @@ pub const Line = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// buffer is expected to be either LineBuffer or FixedLineBuffer, but can
|
// buffer is expected to be either LineBuffer or FixedLineBuffer, but can
|
||||||
// technically be anything with a `nextLine` method
|
// technically be anything with a conformant interface.
|
||||||
pub fn LineTokenizer(comptime Buffer: type) type {
|
pub fn LineTokenizer(comptime Buffer: type) type {
|
||||||
return struct {
|
return struct {
|
||||||
buffer: Buffer,
|
buffer: Buffer,
|
||||||
index: usize = 0,
|
index: usize = 0,
|
||||||
indentation: DetectedIndentation = .unknown,
|
indentation: DetectedIndentation = .unknown,
|
||||||
last_indent: usize = 0,
|
last_indent: usize = 0,
|
||||||
diagnostics: *Diagnostics,
|
|
||||||
row: usize = 0,
|
|
||||||
|
|
||||||
pub fn finish(self: @This()) !void {
|
pub fn finish(self: @This()) !void {
|
||||||
if (!self.buffer.empty()) {
|
if (!self.buffer.empty()) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document has extra content or is missing the final LF character";
|
||||||
return error.ExtraContent;
|
return error.ExtraContent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -91,13 +92,23 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
// ugly documents.
|
// ugly documents.
|
||||||
.unknown => self.indentation = .{ .spaces = 0 },
|
.unknown => self.indentation = .{ .spaces = 0 },
|
||||||
.spaces => {},
|
.spaces => {},
|
||||||
.tabs => return error.MixedIndentation,
|
.tabs => {
|
||||||
|
self.buffer.diag().line_offset = idx;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document contains mixed tab/space indentation";
|
||||||
|
return error.MixedIndentation;
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'\t' => {
|
'\t' => {
|
||||||
switch (self.indentation) {
|
switch (self.indentation) {
|
||||||
.unknown => self.indentation = .tabs,
|
.unknown => self.indentation = .tabs,
|
||||||
.spaces => return error.MixedIndentation,
|
.spaces => {
|
||||||
|
self.buffer.diag().line_offset = idx;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document contains mixed tab/space indentation";
|
||||||
|
return error.MixedIndentation;
|
||||||
|
},
|
||||||
.tabs => {},
|
.tabs => {},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -110,7 +121,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (raw_line.len > 0) return error.TrailingWhitespace;
|
if (raw_line.len > 0) {
|
||||||
|
self.buffer.diag().line_offset = raw_line.len - 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
|
return error.TrailingWhitespace;
|
||||||
|
}
|
||||||
continue :lineloop;
|
continue :lineloop;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,15 +134,23 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
if (self.indentation.spaces == 0) {
|
if (self.indentation.spaces == 0) {
|
||||||
self.indentation.spaces = indent;
|
self.indentation.spaces = indent;
|
||||||
}
|
}
|
||||||
if (@rem(indent, self.indentation.spaces) != 0)
|
if (@rem(indent, self.indentation.spaces) != 0) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = indent;
|
||||||
|
self.buffer.diag().message = "this line contains incorrectly quantized indentation";
|
||||||
return error.UnquantizedIndentation;
|
return error.UnquantizedIndentation;
|
||||||
|
}
|
||||||
|
|
||||||
break :quant @divExact(indent, self.indentation.spaces);
|
break :quant @divExact(indent, self.indentation.spaces);
|
||||||
} else indent;
|
} else indent;
|
||||||
|
|
||||||
const shift: LineShift = if (quantized > self.last_indent) rel: {
|
const shift: LineShift = if (quantized > self.last_indent) rel: {
|
||||||
if ((quantized - self.last_indent) > 1)
|
if ((quantized - self.last_indent) > 1) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = indent;
|
||||||
|
self.buffer.diag().message = "this line contains too much indentation";
|
||||||
return error.TooMuchIndentation;
|
return error.TooMuchIndentation;
|
||||||
|
}
|
||||||
break :rel .indent;
|
break :rel .indent;
|
||||||
} else if (quantized < self.last_indent)
|
} else if (quantized < self.last_indent)
|
||||||
.{ .dedent = self.last_indent - quantized }
|
.{ .dedent = self.last_indent - quantized }
|
||||||
@ -134,10 +158,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.none;
|
.none;
|
||||||
|
|
||||||
defer {
|
defer {
|
||||||
self.row += 1;
|
|
||||||
self.last_indent = quantized;
|
self.last_indent = quantized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// update the diagnostics so that the parser can use them without
|
||||||
|
// knowing about the whitespace.
|
||||||
|
self.buffer.diag().line_offset = indent;
|
||||||
const line = raw_line[indent..];
|
const line = raw_line[indent..];
|
||||||
|
|
||||||
// this should not be possible, as empty lines are caught earlier.
|
// this should not be possible, as empty lines are caught earlier.
|
||||||
@ -147,7 +173,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
'#' => {
|
'#' => {
|
||||||
// force comments to be followed by a space. This makes them
|
// force comments to be followed by a space. This makes them
|
||||||
// behave the same way as strings, actually.
|
// behave the same way as strings, actually.
|
||||||
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
if (line.len > 1 and line[1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the start of comment character '#'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// simply lie about indentation when the line is a comment.
|
// simply lie about indentation when the line is a comment.
|
||||||
quantized = self.last_indent;
|
quantized = self.last_indent;
|
||||||
@ -160,12 +191,21 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
'|', '>', '[', '{' => {
|
'|', '>', '[', '{' => {
|
||||||
return .{
|
return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .in_line = try detectInlineItem(line) },
|
.contents = .{ .in_line = try self.detectInlineItem(line) },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
'-' => {
|
'-' => {
|
||||||
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
if (line.len > 1 and line[1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the list entry character '-'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
// blindly add 2 here because an empty item cannot fail in
|
||||||
|
// the value, only if a bogus dedent has occurred
|
||||||
|
self.buffer.diag().line_offset += 2;
|
||||||
|
|
||||||
return if (line.len == 1) .{
|
return if (line.len == 1) .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
@ -173,26 +213,33 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.raw = line,
|
.raw = line,
|
||||||
} else .{
|
} else .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
.contents = .{ .list_item = try self.detectInlineItem(line[2..]) },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
else => {
|
else => {
|
||||||
for (line, 0..) |char, idx| {
|
for (line, 0..) |char, idx| {
|
||||||
if (char == ':') {
|
if (char == ':') {
|
||||||
|
self.buffer.diag().line_offset += idx + 2;
|
||||||
|
|
||||||
if (idx + 1 == line.len) return .{
|
if (idx + 1 == line.len) return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (line[idx + 1] != ' ') return error.BadToken;
|
if (line[idx + 1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += idx + 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the map key-value separator character ':'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .map_item = .{
|
.contents = .{ .map_item = .{
|
||||||
.key = line[0..idx],
|
.key = line[0..idx],
|
||||||
.val = try detectInlineItem(line[idx + 2 ..]),
|
.val = try self.detectInlineItem(line[idx + 2 ..]),
|
||||||
} },
|
} },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
@ -208,12 +255,16 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// somehow everything else has failed
|
// somehow everything else has failed
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = raw_line.len;
|
||||||
|
self.buffer.diag().message = "this document contains an unknown error. Please report this.";
|
||||||
return error.Impossible;
|
return error.Impossible;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
// TODO: it's impossible to get the right diagnostic offset in this function at the moment
|
||||||
|
fn detectInlineItem(self: @This(), buf: []const u8) Error!InlineItem {
|
||||||
if (buf.len == 0) return .empty;
|
if (buf.len == 0) return .empty;
|
||||||
|
|
||||||
switch (buf[0]) {
|
switch (buf[0]) {
|
||||||
@ -221,7 +272,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
||||||
' ', '\t' => return error.TrailingWhitespace,
|
' ', '\t' => {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
|
return error.TrailingWhitespace;
|
||||||
|
},
|
||||||
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
||||||
else => buf[@min(2, buf.len)..buf.len],
|
else => buf[@min(2, buf.len)..buf.len],
|
||||||
};
|
};
|
||||||
@ -232,22 +288,34 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.{ .space_string = slice };
|
.{ .space_string = slice };
|
||||||
},
|
},
|
||||||
'[' => {
|
'[' => {
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
if (buf.len < 2 or buf[buf.len - 1] != ']') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains a flow-style list but does not end with the closing character ']'";
|
||||||
return error.BadToken;
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// keep the closing ] for the flow parser
|
// keep the closing ] for the flow parser
|
||||||
return .{ .flow_list = buf[1..] };
|
return .{ .flow_list = buf[1..] };
|
||||||
},
|
},
|
||||||
'{' => {
|
'{' => {
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
if (buf.len < 2 or buf[buf.len - 1] != '}') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains a flow-style map but does not end with the closing character '}'";
|
||||||
return error.BadToken;
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// keep the closing } fpr the flow parser
|
// keep the closing } fpr the flow parser
|
||||||
return .{ .flow_map = buf[1..] };
|
return .{ .flow_map = buf[1..] };
|
||||||
},
|
},
|
||||||
else => {
|
else => {
|
||||||
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
return error.TrailingWhitespace;
|
return error.TrailingWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
return .{ .scalar = buf };
|
return .{ .scalar = buf };
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user