diff --git a/src/parser/state.zig b/src/parser/state.zig index cbac552..29afa0e 100644 --- a/src/parser/state.zig +++ b/src/parser/state.zig @@ -452,10 +452,10 @@ pub const State = struct { charloop: for (contents, 0..) |char, idx| { switch (pstate) { .want_list_item => switch (char) { - ' ', '\t' => continue :charloop, + ' ' => continue :charloop, + '\t' => return error.IllegalTabWhitespaceInLine, ',' => { // empty value - // don't check for whitespace here: [ , ] is okay, as is [ , , ] const tip = try state.getStackTip(); try tip.flow_list.append(Value.newScalar(arena_alloc)); item_start = idx + 1; @@ -500,35 +500,33 @@ pub const State = struct { }, }, .consuming_list_item => switch (char) { - // consider: detecting trailing whitespace. "[ 1 ]" should - // produce "1" and not "1 " as it currently does, which breaks - // the principle of least astonishment. design: no trailing - // whitespace before "," and only a single space is allowed before "]" ',' => { - if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') { - state.diagnostics.length = 1; - state.diagnostics.message = "the flow list contains whitespace before ,"; - return error.TrailingWhitespace; - } + const end = end: { + var countup = @max(idx, 1) - 1; + while (countup > 0) : (countup -= 1) { + if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine; + if (contents[countup] != ' ') break :end countup + 1; + } + break :end countup; + }; const tip = try state.getStackTip(); try tip.flow_list.append( - try Value.fromScalar(arena_alloc, contents[item_start..idx]), + try Value.fromScalar(arena_alloc, contents[item_start..end]), ); item_start = idx + 1; pstate = .want_list_item; }, ']' => { - var end = idx; - if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') { - if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) { - state.diagnostics.length = 1; - state.diagnostics.message = "the flow list contains extra whitespace before ]"; - return error.TrailingWhitespace; + const end = end: { + var countup = @max(idx, 1) - 1; + while (countup > 0) : (countup -= 1) { + if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine; + if (contents[countup] != ' ') break :end countup + 1; } - end = idx - 1; - } + break :end countup; + }; const finished = state.value_stack.getLastOrNull() orelse { state.diagnostics.length = 1; @@ -543,7 +541,8 @@ pub const State = struct { else => continue :charloop, }, .want_list_separator => switch (char) { - ' ', '\t' => continue :charloop, + ' ' => continue :charloop, + '\t' => return error.IllegalTabWhitespaceInLine, ',' => { item_start = idx; pstate = .want_list_item; @@ -556,7 +555,8 @@ pub const State = struct { }, }, .want_map_key => switch (char) { - ' ', '\t' => continue :charloop, + ' ' => continue :charloop, + '\t' => return error.IllegalTabWhitespaceInLine, // forbid these characters so that flow dictionary keys cannot start // with characters that regular dictionary keys cannot start with // (even though they're unambiguous in this specific context). @@ -578,18 +578,22 @@ pub const State = struct { }, .consuming_map_key => switch (char) { ':' => { - if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') { - state.diagnostics.length = 1; - state.diagnostics.message = "the flow map contains whitespace before :"; - return error.TrailingWhitespace; - } - dangling_key = try arena_alloc.dupe(u8, contents[item_start..idx]); + const end = end: { + var countup = @max(idx, 1) - 1; + while (countup > 0) : (countup -= 1) { + if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine; + if (contents[countup] != ' ') break :end countup + 1; + } + break :end countup; + }; + dangling_key = try arena_alloc.dupe(u8, contents[item_start..end]); pstate = .want_map_value; }, else => continue :charloop, }, .want_map_value => switch (char) { - ' ', '\t' => continue :charloop, + ' ' => continue :charloop, + '\t' => return error.IllegalTabWhitespaceInLine, ',' => { const tip = try state.getStackTip(); try state.putMap( @@ -651,31 +655,34 @@ pub const State = struct { }, .consuming_map_value => switch (char) { ',' => { - if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') { - state.diagnostics.length = 1; - state.diagnostics.message = "the flow map contains whitespace before ,"; - return error.TrailingWhitespace; - } + const end = end: { + var countup = @max(idx, 1) - 1; + while (countup > 0) : (countup -= 1) { + if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine; + if (contents[countup] != ' ') break :end countup + 1; + } + break :end countup; + }; + const tip = try state.getStackTip(); try state.putMap( &tip.flow_map, dangling_key.?, - try Value.fromScalar(arena_alloc, contents[item_start..idx]), + try Value.fromScalar(arena_alloc, contents[item_start..end]), dkb, ); dangling_key = null; pstate = .want_map_key; }, '}' => { - var end = idx; - if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') { - if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) { - state.diagnostics.length = 1; - state.diagnostics.message = "the flow map contains extra whitespace before }"; - return error.TrailingWhitespace; + const end = end: { + var countup = @max(idx, 1) - 1; + while (countup > 0) : (countup -= 1) { + if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine; + if (contents[countup] != ' ') break :end countup + 1; } - end = idx - 1; - } + break :end countup; + }; const tip = try state.getStackTip(); try state.putMap( @@ -690,7 +697,8 @@ pub const State = struct { else => continue :charloop, }, .want_map_separator => switch (char) { - ' ', '\t' => continue :charloop, + ' ' => continue :charloop, + '\t' => return error.IllegalTabWhitespaceInLine, ',' => pstate = .want_map_key, '}' => pstate = try state.popFlowStack(), else => return { diff --git a/src/tokenizer.zig b/src/tokenizer.zig index 1379260..819603f 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -9,6 +9,7 @@ pub const Error = error{ TooMuchIndentation, UnquantizedIndentation, TrailingWhitespace, + IllegalTabWhitespaceInLine, Impossible, }; @@ -220,13 +221,21 @@ pub fn LineTokenizer(comptime Buffer: type) type { else => { for (line, 0..) |char, idx| { if (char == ':') { - self.buffer.diag().line_offset += idx + 2; + if (idx > 0 and (line[idx - 1] == ' ' or line[idx - 1] == '\t')) { + self.buffer.diag().line_offset += idx - 1; + self.buffer.diag().length = 1; + self.buffer.diag().message = "this line contains space before the map key-value separator character ':'"; + return error.TrailingWhitespace; + } - if (idx + 1 == line.len) return .{ - .shift = shift, - .contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } }, - .raw = line, - }; + if (idx + 1 == line.len) { + self.buffer.diag().line_offset += idx + 1; + return .{ + .shift = shift, + .contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } }, + .raw = line, + }; + } if (line[idx + 1] != ' ') { self.buffer.diag().line_offset += idx + 1; @@ -267,9 +276,21 @@ pub fn LineTokenizer(comptime Buffer: type) type { fn detectInlineItem(self: @This(), buf: []const u8) Error!InlineItem { if (buf.len == 0) return .empty; - switch (buf[0]) { + const start = start: { + for (buf, 0..) |chr, idx| + if (chr == ' ') + continue + else if (chr == '\t') + return error.IllegalTabWhitespaceInLine + else + break :start idx; + + return error.TrailingWhitespace; + }; + + switch (buf[start]) { '>', '|' => |char| { - if (buf.len > 1 and buf[1] != ' ') return error.BadToken; + if (buf.len - start > 1 and buf[start + 1] != ' ') return error.BadToken; const slice: []const u8 = switch (buf[buf.len - 1]) { ' ', '\t' => { @@ -278,8 +299,8 @@ pub fn LineTokenizer(comptime Buffer: type) type { self.buffer.diag().message = "this line contains trailing whitespace"; return error.TrailingWhitespace; }, - '|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)], - else => buf[@min(2, buf.len)..buf.len], + '|' => buf[start + @min(2, buf.len - start) .. buf.len - @intFromBool(buf.len - start > 1)], + else => buf[start + @min(2, buf.len - start) .. buf.len], }; return if (char == '>') @@ -288,7 +309,7 @@ pub fn LineTokenizer(comptime Buffer: type) type { .{ .space_string = slice }; }, '[' => { - if (buf.len < 2 or buf[buf.len - 1] != ']') { + if (buf.len - start < 2 or buf[buf.len - 1] != ']') { self.buffer.diag().line_offset = 0; self.buffer.diag().length = 1; self.buffer.diag().message = "this line contains a flow-style list but does not end with the closing character ']'"; @@ -296,10 +317,10 @@ pub fn LineTokenizer(comptime Buffer: type) type { } // keep the closing ] for the flow parser - return .{ .flow_list = buf[1..] }; + return .{ .flow_list = buf[start + 1 ..] }; }, '{' => { - if (buf.len < 2 or buf[buf.len - 1] != '}') { + if (buf.len - start < 2 or buf[buf.len - 1] != '}') { self.buffer.diag().line_offset = 0; self.buffer.diag().length = 1; self.buffer.diag().message = "this line contains a flow-style map but does not end with the closing character '}'"; @@ -307,7 +328,7 @@ pub fn LineTokenizer(comptime Buffer: type) type { } // keep the closing } fpr the flow parser - return .{ .flow_map = buf[1..] }; + return .{ .flow_map = buf[start + 1 ..] }; }, else => { if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t') { @@ -317,7 +338,7 @@ pub fn LineTokenizer(comptime Buffer: type) type { return error.TrailingWhitespace; } - return .{ .scalar = buf }; + return .{ .scalar = buf[start..] }; }, } }