Compare commits
2 Commits
0e60719c85
...
01f98f9aff
Author | SHA1 | Date | |
---|---|---|---|
01f98f9aff | |||
3258e7fdb5 |
@ -15,7 +15,16 @@ pub fn main() !void {
|
|||||||
var needfree = true;
|
var needfree = true;
|
||||||
defer if (needfree) allocator.free(data);
|
defer if (needfree) allocator.free(data);
|
||||||
|
|
||||||
const document = try nice.parseBuffer(allocator, data, .{});
|
var diagnostics = nice.Diagnostics{};
|
||||||
|
const document = nice.parseBuffer(allocator, data, &diagnostics, .{}) catch |err| {
|
||||||
|
std.debug.print("{s}:{d} col:{d}: {s}\n", .{
|
||||||
|
args[1],
|
||||||
|
diagnostics.row,
|
||||||
|
diagnostics.line_offset,
|
||||||
|
diagnostics.message,
|
||||||
|
});
|
||||||
|
return err;
|
||||||
|
};
|
||||||
defer document.deinit();
|
defer document.deinit();
|
||||||
|
|
||||||
// free data memory to ensure that the parsed document is not holding
|
// free data memory to ensure that the parsed document is not holding
|
||||||
|
@ -16,6 +16,7 @@ pub fn main() !void {
|
|||||||
defer file.close();
|
defer file.close();
|
||||||
var parser = try nice.StreamParser.init(allocator, .{});
|
var parser = try nice.StreamParser.init(allocator, .{});
|
||||||
defer parser.deinit();
|
defer parser.deinit();
|
||||||
|
errdefer parser.parse_state.document.deinit();
|
||||||
while (true) {
|
while (true) {
|
||||||
var buf = [_]u8{0} ** 1024;
|
var buf = [_]u8{0} ** 1024;
|
||||||
const len = try file.read(&buf);
|
const len = try file.read(&buf);
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
|
const Diagnostics = @import("./parser.zig").Diagnostics;
|
||||||
|
|
||||||
pub const IndexSlice = struct { start: usize, len: usize };
|
pub const IndexSlice = struct { start: usize, len: usize };
|
||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
@ -45,14 +47,15 @@ pub fn LineBuffer(comptime options: Strictness) type {
|
|||||||
|
|
||||||
pub const default_capacity: usize = 4096;
|
pub const default_capacity: usize = 4096;
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) !@This() {
|
pub fn init(allocator: std.mem.Allocator, diagnostics: *Diagnostics) !@This() {
|
||||||
return initCapacity(allocator, default_capacity);
|
return initCapacity(allocator, diagnostics, default_capacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initCapacity(allocator: std.mem.Allocator, capacity: usize) !@This() {
|
pub fn initCapacity(allocator: std.mem.Allocator, diagnostics: *Diagnostics, capacity: usize) !@This() {
|
||||||
return .{
|
return .{
|
||||||
.allocator = allocator,
|
.allocator = allocator,
|
||||||
.internal = .{
|
.internal = .{
|
||||||
|
.diagnostics = diagnostics,
|
||||||
.buffer = try allocator.alloc(u8, capacity),
|
.buffer = try allocator.alloc(u8, capacity),
|
||||||
.window = .{ .start = 0, .len = 0 },
|
.window = .{ .start = 0, .len = 0 },
|
||||||
},
|
},
|
||||||
@ -60,6 +63,14 @@ pub fn LineBuffer(comptime options: Strictness) type {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn diag(self: @This()) *Diagnostics {
|
||||||
|
return self.internal.diagnostics;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn empty(self: @This()) bool {
|
||||||
|
return self.internal.empty();
|
||||||
|
}
|
||||||
|
|
||||||
pub fn deinit(self: @This()) void {
|
pub fn deinit(self: @This()) void {
|
||||||
self.allocator.free(self.internal.buffer);
|
self.allocator.free(self.internal.buffer);
|
||||||
}
|
}
|
||||||
@ -107,9 +118,24 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
return struct {
|
return struct {
|
||||||
buffer: []const u8,
|
buffer: []const u8,
|
||||||
window: IndexSlice,
|
window: IndexSlice,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
|
|
||||||
pub fn init(data: []const u8) @This() {
|
pub fn init(data: []const u8, diagnostics: *Diagnostics) @This() {
|
||||||
return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } };
|
return .{
|
||||||
|
.buffer = data,
|
||||||
|
.window = .{ .start = 0, .len = data.len },
|
||||||
|
.diagnostics = diagnostics,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn diag(self: @This()) *Diagnostics {
|
||||||
|
return self.diagnostics;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn empty(self: @This()) bool {
|
||||||
|
// we can't check the overall buffer size because the dynamic buffer may be
|
||||||
|
// overallocated
|
||||||
|
return self.window.len == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn nextLine(self: *@This()) !?[]const u8 {
|
pub fn nextLine(self: *@This()) !?[]const u8 {
|
||||||
@ -121,16 +147,33 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
const split: usize = split: {
|
const split: usize = split: {
|
||||||
for (window, 0..) |char, idx| {
|
for (window, 0..) |char, idx| {
|
||||||
if (comptime options.check_carriage_return)
|
if (comptime options.check_carriage_return)
|
||||||
if (char == '\r') return error.IllegalCarriageReturn;
|
if (char == '\r') {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found a carriage return";
|
||||||
|
return error.IllegalCarriageReturn;
|
||||||
|
};
|
||||||
|
|
||||||
if (comptime options.check_nonprinting_ascii)
|
if (comptime options.check_nonprinting_ascii)
|
||||||
if ((char != '\n' and char != '\t') and (char < ' ' or char == 0x7F))
|
if ((char != '\n' and char != '\t') and (char < ' ' or char == 0x7F)) {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found nonprinting ascii characters";
|
||||||
return error.IllegalNonprintingAscii;
|
return error.IllegalNonprintingAscii;
|
||||||
|
};
|
||||||
|
|
||||||
if (comptime options.check_trailing_whitespace) {
|
if (comptime options.check_trailing_whitespace) {
|
||||||
if (char == '\n') {
|
if (char == '\n') {
|
||||||
if (idx > 0 and (window[idx - 1] == ' ' or window[idx - 1] == '\t'))
|
if (idx > 0 and (window[idx - 1] == ' ' or window[idx - 1] == '\t')) {
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "found trailing spaces";
|
||||||
return error.IllegalTrailingSpace;
|
return error.IllegalTrailingSpace;
|
||||||
|
}
|
||||||
|
|
||||||
break :split idx;
|
break :split idx;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -140,12 +183,41 @@ pub fn FixedLineBuffer(comptime options: Strictness) type {
|
|||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.diagnostics.row += 1;
|
||||||
|
self.diagnostics.line_offset = 0;
|
||||||
|
|
||||||
self.window.start += split + 1;
|
self.window.start += split + 1;
|
||||||
self.window.len -= split + 1;
|
self.window.len -= split + 1;
|
||||||
|
|
||||||
if (comptime options.validate_utf8) {
|
if (comptime options.validate_utf8) {
|
||||||
const line = window[0..split];
|
const line = window[0..split];
|
||||||
return if (std.unicode.utf8ValidateSlice(line)) line else error.InputIsNotValidUtf8;
|
|
||||||
|
var idx: usize = 0;
|
||||||
|
while (idx < line.len) {
|
||||||
|
if (std.unicode.utf8ByteSequenceLength(line[idx])) |cp_len| {
|
||||||
|
if (idx + cp_len > line.len) {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = cp_len;
|
||||||
|
self.diagnostics.message = "truncated UTF-8 sequence";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std.meta.isError(std.unicode.utf8Decode(line[idx .. idx + cp_len]))) {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = cp_len;
|
||||||
|
self.diagnostics.message = "invalid UTF-8 sequence";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
idx += cp_len;
|
||||||
|
} else |_| {
|
||||||
|
self.diagnostics.line_offset = idx;
|
||||||
|
self.diagnostics.length = 1;
|
||||||
|
self.diagnostics.message = "invalid UTF-8 sequence start byte";
|
||||||
|
return error.InputIsNotValidUtf8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return line;
|
||||||
} else {
|
} else {
|
||||||
return window[0..split];
|
return window[0..split];
|
||||||
}
|
}
|
||||||
|
@ -68,3 +68,4 @@ pub const parseBuffer = parser.parseBuffer;
|
|||||||
pub const StreamParser = parser.StreamParser;
|
pub const StreamParser = parser.StreamParser;
|
||||||
pub const Document = parser.Document;
|
pub const Document = parser.Document;
|
||||||
pub const Value = parser.Value;
|
pub const Value = parser.Value;
|
||||||
|
pub const Diagnostics = parser.Diagnostics;
|
||||||
|
@ -8,14 +8,14 @@ pub const Value = @import("./parser/value.zig").Value;
|
|||||||
|
|
||||||
pub const Diagnostics = struct {
|
pub const Diagnostics = struct {
|
||||||
row: usize = 0,
|
row: usize = 0,
|
||||||
span: struct { absolute: usize = 0, line_offset: usize = 0, length: usize = 0 } = .{},
|
line_offset: usize = 0,
|
||||||
|
length: usize = 0,
|
||||||
message: []const u8 = "no problems",
|
message: []const u8 = "no problems",
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
UnexpectedIndent,
|
UnexpectedIndent,
|
||||||
UnexpectedValue,
|
UnexpectedValue,
|
||||||
ExtraContent,
|
|
||||||
EmptyDocument,
|
EmptyDocument,
|
||||||
DuplicateKey,
|
DuplicateKey,
|
||||||
BadMapEntry,
|
BadMapEntry,
|
||||||
@ -42,18 +42,20 @@ pub const Options = struct {
|
|||||||
default_object: enum { string, list, map, fail } = .fail,
|
default_object: enum { string, list, map, fail } = .fail,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, options: Options) !Document {
|
pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, diagnostics: *Diagnostics, options: Options) !Document {
|
||||||
var state = State.init(allocator);
|
var state = State.init(allocator, diagnostics);
|
||||||
defer state.deinit();
|
defer state.deinit();
|
||||||
errdefer state.document.deinit();
|
errdefer state.document.deinit();
|
||||||
|
|
||||||
var diagnostics = Diagnostics{};
|
|
||||||
var tok: tokenizer.LineTokenizer(buffers.ValidatingFixedLineBuffer) = .{
|
var tok: tokenizer.LineTokenizer(buffers.ValidatingFixedLineBuffer) = .{
|
||||||
.buffer = buffers.ValidatingFixedLineBuffer.init(buffer),
|
.buffer = buffers.ValidatingFixedLineBuffer.init(buffer, diagnostics),
|
||||||
.diagnostics = &diagnostics,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
while (try tok.next()) |line| try state.parseLine(line, options.duplicate_key_behavior);
|
while (try tok.next()) |line| try state.parseLine(line, options.duplicate_key_behavior);
|
||||||
|
// state doesn't have access to the tokenizer, which is the only thing that can
|
||||||
|
// error if unparsed lines remain in the buffer by the time that "finish" is
|
||||||
|
// called.
|
||||||
|
try tok.finish();
|
||||||
return try state.finish(options);
|
return try state.finish(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,7 +63,6 @@ pub const StreamParser = struct {
|
|||||||
linetok: tokenizer.LineTokenizer(buffers.ValidatingLineBuffer),
|
linetok: tokenizer.LineTokenizer(buffers.ValidatingLineBuffer),
|
||||||
parse_state: State,
|
parse_state: State,
|
||||||
parse_options: Options = .{},
|
parse_options: Options = .{},
|
||||||
diagnostics: Diagnostics = .{},
|
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator, options: Options) !StreamParser {
|
pub fn init(allocator: std.mem.Allocator, options: Options) !StreamParser {
|
||||||
const diagnostics = try allocator.create(Diagnostics);
|
const diagnostics = try allocator.create(Diagnostics);
|
||||||
@ -70,16 +71,15 @@ pub const StreamParser = struct {
|
|||||||
|
|
||||||
return .{
|
return .{
|
||||||
.linetok = .{
|
.linetok = .{
|
||||||
.buffer = try buffers.ValidatingLineBuffer.init(allocator),
|
.buffer = try buffers.ValidatingLineBuffer.init(allocator, diagnostics),
|
||||||
.diagnostics = diagnostics,
|
|
||||||
},
|
},
|
||||||
.parse_state = State.init(allocator),
|
.parse_state = State.init(allocator, diagnostics),
|
||||||
.parse_options = options,
|
.parse_options = options,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: StreamParser) void {
|
pub fn deinit(self: StreamParser) void {
|
||||||
self.linetok.buffer.allocator.destroy(self.linetok.diagnostics);
|
self.linetok.buffer.allocator.destroy(self.parse_state.diagnostics);
|
||||||
self.linetok.buffer.deinit();
|
self.linetok.buffer.deinit();
|
||||||
self.parse_state.deinit();
|
self.parse_state.deinit();
|
||||||
}
|
}
|
||||||
@ -90,6 +90,7 @@ pub const StreamParser = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn finish(self: *StreamParser) !Document {
|
pub fn finish(self: *StreamParser) !Document {
|
||||||
|
try self.linetok.finish();
|
||||||
return try self.parse_state.finish(self.parse_options);
|
return try self.parse_state.finish(self.parse_options);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -4,6 +4,7 @@ const tokenizer = @import("../tokenizer.zig");
|
|||||||
const Error = @import("../parser.zig").Error;
|
const Error = @import("../parser.zig").Error;
|
||||||
const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
|
const DuplicateKeyBehavior = @import("../parser.zig").DuplicateKeyBehavior;
|
||||||
const Options = @import("../parser.zig").Options;
|
const Options = @import("../parser.zig").Options;
|
||||||
|
const Diagnostics = @import("../parser.zig").Diagnostics;
|
||||||
const Value = @import("./value.zig").Value;
|
const Value = @import("./value.zig").Value;
|
||||||
|
|
||||||
pub const Document = struct {
|
pub const Document = struct {
|
||||||
@ -42,14 +43,16 @@ pub const State = struct {
|
|||||||
pub const Stack = std.ArrayList(*Value);
|
pub const Stack = std.ArrayList(*Value);
|
||||||
|
|
||||||
document: Document,
|
document: Document,
|
||||||
|
diagnostics: *Diagnostics,
|
||||||
value_stack: Stack,
|
value_stack: Stack,
|
||||||
mode: enum { initial, value, done } = .initial,
|
mode: enum { initial, value, done } = .initial,
|
||||||
expect_shift: tokenizer.ShiftDirection = .none,
|
expect_shift: tokenizer.ShiftDirection = .none,
|
||||||
dangling_key: ?[]const u8 = null,
|
dangling_key: ?[]const u8 = null,
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator) State {
|
pub fn init(allocator: std.mem.Allocator, diagnostics: *Diagnostics) State {
|
||||||
return .{
|
return .{
|
||||||
.document = Document.init(allocator),
|
.document = Document.init(allocator),
|
||||||
|
.diagnostics = diagnostics,
|
||||||
.value_stack = Stack.init(allocator),
|
.value_stack = Stack.init(allocator),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,10 @@ const Diagnostics = @import("./parser.zig").Diagnostics;
|
|||||||
|
|
||||||
pub const Error = error{
|
pub const Error = error{
|
||||||
BadToken,
|
BadToken,
|
||||||
|
ExtraContent,
|
||||||
MixedIndentation,
|
MixedIndentation,
|
||||||
UnquantizedIndentation,
|
|
||||||
TooMuchIndentation,
|
TooMuchIndentation,
|
||||||
MissingNewline,
|
UnquantizedIndentation,
|
||||||
TrailingWhitespace,
|
TrailingWhitespace,
|
||||||
Impossible,
|
Impossible,
|
||||||
};
|
};
|
||||||
@ -60,15 +60,22 @@ pub const Line = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// buffer is expected to be either LineBuffer or FixedLineBuffer, but can
|
// buffer is expected to be either LineBuffer or FixedLineBuffer, but can
|
||||||
// technically be anything with a `nextLine` method
|
// technically be anything with a conformant interface.
|
||||||
pub fn LineTokenizer(comptime Buffer: type) type {
|
pub fn LineTokenizer(comptime Buffer: type) type {
|
||||||
return struct {
|
return struct {
|
||||||
buffer: Buffer,
|
buffer: Buffer,
|
||||||
index: usize = 0,
|
index: usize = 0,
|
||||||
indentation: DetectedIndentation = .unknown,
|
indentation: DetectedIndentation = .unknown,
|
||||||
last_indent: usize = 0,
|
last_indent: usize = 0,
|
||||||
diagnostics: *Diagnostics,
|
|
||||||
row: usize = 0,
|
pub fn finish(self: @This()) !void {
|
||||||
|
if (!self.buffer.empty()) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document has extra content or is missing the final LF character";
|
||||||
|
return error.ExtraContent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn next(self: *@This()) !?Line {
|
pub fn next(self: *@This()) !?Line {
|
||||||
lineloop: while (try self.buffer.nextLine()) |raw_line| {
|
lineloop: while (try self.buffer.nextLine()) |raw_line| {
|
||||||
@ -85,13 +92,23 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
// ugly documents.
|
// ugly documents.
|
||||||
.unknown => self.indentation = .{ .spaces = 0 },
|
.unknown => self.indentation = .{ .spaces = 0 },
|
||||||
.spaces => {},
|
.spaces => {},
|
||||||
.tabs => return error.MixedIndentation,
|
.tabs => {
|
||||||
|
self.buffer.diag().line_offset = idx;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document contains mixed tab/space indentation";
|
||||||
|
return error.MixedIndentation;
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'\t' => {
|
'\t' => {
|
||||||
switch (self.indentation) {
|
switch (self.indentation) {
|
||||||
.unknown => self.indentation = .tabs,
|
.unknown => self.indentation = .tabs,
|
||||||
.spaces => return error.MixedIndentation,
|
.spaces => {
|
||||||
|
self.buffer.diag().line_offset = idx;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "the document contains mixed tab/space indentation";
|
||||||
|
return error.MixedIndentation;
|
||||||
|
},
|
||||||
.tabs => {},
|
.tabs => {},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -104,7 +121,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (raw_line.len > 0) return error.TrailingWhitespace;
|
if (raw_line.len > 0) {
|
||||||
|
self.buffer.diag().line_offset = raw_line.len - 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
|
return error.TrailingWhitespace;
|
||||||
|
}
|
||||||
continue :lineloop;
|
continue :lineloop;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,15 +134,23 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
if (self.indentation.spaces == 0) {
|
if (self.indentation.spaces == 0) {
|
||||||
self.indentation.spaces = indent;
|
self.indentation.spaces = indent;
|
||||||
}
|
}
|
||||||
if (@rem(indent, self.indentation.spaces) != 0)
|
if (@rem(indent, self.indentation.spaces) != 0) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = indent;
|
||||||
|
self.buffer.diag().message = "this line contains incorrectly quantized indentation";
|
||||||
return error.UnquantizedIndentation;
|
return error.UnquantizedIndentation;
|
||||||
|
}
|
||||||
|
|
||||||
break :quant @divExact(indent, self.indentation.spaces);
|
break :quant @divExact(indent, self.indentation.spaces);
|
||||||
} else indent;
|
} else indent;
|
||||||
|
|
||||||
const shift: LineShift = if (quantized > self.last_indent) rel: {
|
const shift: LineShift = if (quantized > self.last_indent) rel: {
|
||||||
if ((quantized - self.last_indent) > 1)
|
if ((quantized - self.last_indent) > 1) {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = indent;
|
||||||
|
self.buffer.diag().message = "this line contains too much indentation";
|
||||||
return error.TooMuchIndentation;
|
return error.TooMuchIndentation;
|
||||||
|
}
|
||||||
break :rel .indent;
|
break :rel .indent;
|
||||||
} else if (quantized < self.last_indent)
|
} else if (quantized < self.last_indent)
|
||||||
.{ .dedent = self.last_indent - quantized }
|
.{ .dedent = self.last_indent - quantized }
|
||||||
@ -128,10 +158,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.none;
|
.none;
|
||||||
|
|
||||||
defer {
|
defer {
|
||||||
self.row += 1;
|
|
||||||
self.last_indent = quantized;
|
self.last_indent = quantized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// update the diagnostics so that the parser can use them without
|
||||||
|
// knowing about the whitespace.
|
||||||
|
self.buffer.diag().line_offset = indent;
|
||||||
const line = raw_line[indent..];
|
const line = raw_line[indent..];
|
||||||
|
|
||||||
// this should not be possible, as empty lines are caught earlier.
|
// this should not be possible, as empty lines are caught earlier.
|
||||||
@ -141,7 +173,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
'#' => {
|
'#' => {
|
||||||
// force comments to be followed by a space. This makes them
|
// force comments to be followed by a space. This makes them
|
||||||
// behave the same way as strings, actually.
|
// behave the same way as strings, actually.
|
||||||
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
if (line.len > 1 and line[1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the start of comment character '#'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// simply lie about indentation when the line is a comment.
|
// simply lie about indentation when the line is a comment.
|
||||||
quantized = self.last_indent;
|
quantized = self.last_indent;
|
||||||
@ -154,12 +191,21 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
'|', '>', '[', '{' => {
|
'|', '>', '[', '{' => {
|
||||||
return .{
|
return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .in_line = try detectInlineItem(line) },
|
.contents = .{ .in_line = try self.detectInlineItem(line) },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
'-' => {
|
'-' => {
|
||||||
if (line.len > 1 and line[1] != ' ') return error.BadToken;
|
if (line.len > 1 and line[1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the list entry character '-'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
// blindly add 2 here because an empty item cannot fail in
|
||||||
|
// the value, only if a bogus dedent has occurred
|
||||||
|
self.buffer.diag().line_offset += 2;
|
||||||
|
|
||||||
return if (line.len == 1) .{
|
return if (line.len == 1) .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
@ -167,26 +213,33 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.raw = line,
|
.raw = line,
|
||||||
} else .{
|
} else .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .list_item = try detectInlineItem(line[2..]) },
|
.contents = .{ .list_item = try self.detectInlineItem(line[2..]) },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
else => {
|
else => {
|
||||||
for (line, 0..) |char, idx| {
|
for (line, 0..) |char, idx| {
|
||||||
if (char == ':') {
|
if (char == ':') {
|
||||||
|
self.buffer.diag().line_offset += idx + 2;
|
||||||
|
|
||||||
if (idx + 1 == line.len) return .{
|
if (idx + 1 == line.len) return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
.contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (line[idx + 1] != ' ') return error.BadToken;
|
if (line[idx + 1] != ' ') {
|
||||||
|
self.buffer.diag().line_offset += idx + 1;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line is missing a space after the map key-value separator character ':'";
|
||||||
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.shift = shift,
|
.shift = shift,
|
||||||
.contents = .{ .map_item = .{
|
.contents = .{ .map_item = .{
|
||||||
.key = line[0..idx],
|
.key = line[0..idx],
|
||||||
.val = try detectInlineItem(line[idx + 2 ..]),
|
.val = try self.detectInlineItem(line[idx + 2 ..]),
|
||||||
} },
|
} },
|
||||||
.raw = line,
|
.raw = line,
|
||||||
};
|
};
|
||||||
@ -202,12 +255,16 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// somehow everything else has failed
|
// somehow everything else has failed
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = raw_line.len;
|
||||||
|
self.buffer.diag().message = "this document contains an unknown error. Please report this.";
|
||||||
return error.Impossible;
|
return error.Impossible;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn detectInlineItem(buf: []const u8) Error!InlineItem {
|
// TODO: it's impossible to get the right diagnostic offset in this function at the moment
|
||||||
|
fn detectInlineItem(self: @This(), buf: []const u8) Error!InlineItem {
|
||||||
if (buf.len == 0) return .empty;
|
if (buf.len == 0) return .empty;
|
||||||
|
|
||||||
switch (buf[0]) {
|
switch (buf[0]) {
|
||||||
@ -215,7 +272,12 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
|
||||||
|
|
||||||
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
const slice: []const u8 = switch (buf[buf.len - 1]) {
|
||||||
' ', '\t' => return error.TrailingWhitespace,
|
' ', '\t' => {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
|
return error.TrailingWhitespace;
|
||||||
|
},
|
||||||
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
'|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
|
||||||
else => buf[@min(2, buf.len)..buf.len],
|
else => buf[@min(2, buf.len)..buf.len],
|
||||||
};
|
};
|
||||||
@ -226,22 +288,34 @@ pub fn LineTokenizer(comptime Buffer: type) type {
|
|||||||
.{ .space_string = slice };
|
.{ .space_string = slice };
|
||||||
},
|
},
|
||||||
'[' => {
|
'[' => {
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != ']')
|
if (buf.len < 2 or buf[buf.len - 1] != ']') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains a flow-style list but does not end with the closing character ']'";
|
||||||
return error.BadToken;
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// keep the closing ] for the flow parser
|
// keep the closing ] for the flow parser
|
||||||
return .{ .flow_list = buf[1..] };
|
return .{ .flow_list = buf[1..] };
|
||||||
},
|
},
|
||||||
'{' => {
|
'{' => {
|
||||||
if (buf.len < 2 or buf[buf.len - 1] != '}')
|
if (buf.len < 2 or buf[buf.len - 1] != '}') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains a flow-style map but does not end with the closing character '}'";
|
||||||
return error.BadToken;
|
return error.BadToken;
|
||||||
|
}
|
||||||
|
|
||||||
// keep the closing } fpr the flow parser
|
// keep the closing } fpr the flow parser
|
||||||
return .{ .flow_map = buf[1..] };
|
return .{ .flow_map = buf[1..] };
|
||||||
},
|
},
|
||||||
else => {
|
else => {
|
||||||
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t')
|
if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t') {
|
||||||
|
self.buffer.diag().line_offset = 0;
|
||||||
|
self.buffer.diag().length = 1;
|
||||||
|
self.buffer.diag().message = "this line contains trailing whitespace";
|
||||||
return error.TrailingWhitespace;
|
return error.TrailingWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
return .{ .scalar = buf };
|
return .{ .scalar = buf };
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user