From 3258e7fdb5f78f808a8c9aa065c86dc73e59144c Mon Sep 17 00:00:00 2001 From: torque Date: Wed, 27 Sep 2023 23:35:24 -0700 Subject: [PATCH] tokenizer: add finish function to check if there is trailing data Since the tokenizer is decoupled from the parser, there's no good way to do this. Also without attempting to parse the last line, it's impossible to say if it is junk data or simply a missing trailing new line. --- src/linebuffer.zig | 10 ++++++++++ src/parser.zig | 5 +++++ src/tokenizer.zig | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/src/linebuffer.zig b/src/linebuffer.zig index f48a265..c68d5b6 100644 --- a/src/linebuffer.zig +++ b/src/linebuffer.zig @@ -60,6 +60,10 @@ pub fn LineBuffer(comptime options: Strictness) type { }; } + pub fn empty(self: @This()) bool { + return self.internal.empty(); + } + pub fn deinit(self: @This()) void { self.allocator.free(self.internal.buffer); } @@ -112,6 +116,12 @@ pub fn FixedLineBuffer(comptime options: Strictness) type { return .{ .buffer = data, .window = .{ .start = 0, .len = data.len } }; } + pub fn empty(self: @This()) bool { + // we can't check the overall buffer size because the dynamic buffer may be + // overallocated + return self.window.len == 0; + } + pub fn nextLine(self: *@This()) !?[]const u8 { if (self.window.start >= self.buffer.len or self.window.len == 0) return null; diff --git a/src/parser.zig b/src/parser.zig index f7e0a1e..457c45d 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -54,6 +54,10 @@ pub fn parseBuffer(allocator: std.mem.Allocator, buffer: []const u8, options: Op }; while (try tok.next()) |line| try state.parseLine(line, options.duplicate_key_behavior); + // state doesn't have access to the tokenizer, which is the only thing that can + // error if unparsed lines remain in the buffer by the time that "finish" is + // called. + try tok.finish(); return try state.finish(options); } @@ -90,6 +94,7 @@ pub const StreamParser = struct { } pub fn finish(self: *StreamParser) !Document { + try self.linetok.finish(); return try self.parse_state.finish(self.parse_options); } }; diff --git a/src/tokenizer.zig b/src/tokenizer.zig index e52ba4a..14788e2 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -70,6 +70,12 @@ pub fn LineTokenizer(comptime Buffer: type) type { diagnostics: *Diagnostics, row: usize = 0, + pub fn finish(self: @This()) !void { + if (!self.buffer.empty()) { + return error.ExtraContent; + } + } + pub fn next(self: *@This()) !?Line { lineloop: while (try self.buffer.nextLine()) |raw_line| { var indent: usize = 0;