diff --git a/src/parser/state.zig b/src/parser/state.zig index 29afa0e..8e9e3f5 100644 --- a/src/parser/state.zig +++ b/src/parser/state.zig @@ -26,6 +26,7 @@ pub const State = struct { document: Document, diagnostics: *Diagnostics, value_stack: Stack, + string_builder: std.ArrayListUnmanaged(u8), mode: enum { initial, value, done } = .initial, expect_shift: tokenizer.ShiftDirection = .none, dangling_key: ?[]const u8 = null, @@ -35,6 +36,7 @@ pub const State = struct { .document = Document.init(allocator), .diagnostics = diagnostics, .value_stack = Stack.init(allocator), + .string_builder = std.ArrayListUnmanaged(u8){}, }; } @@ -47,7 +49,7 @@ pub const State = struct { switch (state.mode) { .initial => switch (options.default_object) { - .string => state.document.root = Value.newString(arena_alloc), + .string => state.document.root = Value.emptyString(), .list => state.document.root = Value.newList(arena_alloc), .map => state.document.root = Value.newMap(arena_alloc), .fail => { @@ -58,14 +60,14 @@ pub const State = struct { }, .value => switch (state.value_stack.getLast().*) { // remove the final trailing newline or space - .string => |*string| _ = string.popOrNull(), - // if we have a dangling -, attach an empty string to it - .list => |*list| if (state.expect_shift == .indent) try list.append(Value.newScalar(arena_alloc)), - // if we have a dangling "key:", attach an empty string to it + .string => |*string| string.* = try state.string_builder.toOwnedSlice(arena_alloc), + // if we have a dangling -, attach an empty scalar to it + .list => |*list| if (state.expect_shift == .indent) try list.append(Value.emptyScalar()), + // if we have a dangling "key:", attach an empty scalar to it .map => |*map| if (state.dangling_key) |dk| try state.putMap( map, dk, - Value.newScalar(arena_alloc), + Value.emptyScalar(), options.duplicate_key_behavior, ), .scalar, .flow_list, .flow_map => {}, @@ -102,9 +104,9 @@ pub const State = struct { state.document.root = try Value.fromScalar(arena_alloc, str); state.mode = .done; }, - .line_string, .space_string => |str| { - state.document.root = try Value.fromString(arena_alloc, str); - try state.document.root.string.append(in_line.lineEnding()); + .line_string, .concat_string => |str| { + state.document.root = Value.emptyString(); + try state.string_builder.appendSlice(arena_alloc, str); try state.value_stack.append(&state.document.root); state.mode = .value; }, @@ -126,7 +128,7 @@ pub const State = struct { switch (value) { .empty => state.expect_shift = .indent, .scalar => |str| try rootlist.append(try Value.fromScalar(arena_alloc, str)), - .line_string, .space_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)), + .line_string, .concat_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)), .flow_list => |str| try rootlist.append(try state.parseFlow(str, .flow_list, dkb)), .flow_map => |str| try rootlist.append(try state.parseFlow(str, .flow_map, dkb)), } @@ -144,7 +146,7 @@ pub const State = struct { state.dangling_key = dupekey; }, .scalar => |str| try rootmap.put(dupekey, try Value.fromScalar(arena_alloc, str)), - .line_string, .space_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)), + .line_string, .concat_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)), .flow_list => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_list, dkb)), .flow_map => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_map, dkb)), } @@ -163,13 +165,13 @@ pub const State = struct { .string => |*string| { if (line.shift == .indent) { state.diagnostics.length = 1; - state.diagnostics.message = "the document contains an invalid indented line in a multiline string"; + state.diagnostics.message = "the document contains invalid indentation in a multiline string"; return error.UnexpectedIndent; } if (firstpass and line.shift == .dedent) { - // kick off the last trailing space or newline - _ = string.pop(); + // copy the string into the document proper + string.* = try state.string_builder.toOwnedSlice(arena_alloc); var dedent_depth = line.shift.dedent; while (dedent_depth > 0) : (dedent_depth -= 1) @@ -182,9 +184,10 @@ pub const State = struct { .comment => unreachable, .in_line => |in_line| switch (in_line) { .empty => unreachable, - .line_string, .space_string => |str| { - try string.appendSlice(str); - try string.append(in_line.lineEnding()); + inline .line_string, .concat_string => |str, tag| { + if (tag == .line_string) + try state.string_builder.append(arena_alloc, '\n'); + try state.string_builder.appendSlice(arena_alloc, str); }, else => { state.diagnostics.length = 1; @@ -208,7 +211,7 @@ pub const State = struct { // the first line here creates the state.expect_shift, but the second line // is a valid continuation of the list despite not being indented if (firstpass and (state.expect_shift == .indent and line.shift != .indent)) - try list.append(Value.newScalar(arena_alloc)); + try list.append(Value.emptyScalar()); // Consider: // @@ -245,9 +248,9 @@ pub const State = struct { .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)), .flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)), .flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)), - .line_string, .space_string => |str| { - const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str)); - try new_string.string.append(in_line.lineEnding()); + .line_string, .concat_string => |str| { + const new_string = try appendListGetValue(list, Value.emptyString()); + try state.string_builder.appendSlice(arena_alloc, str); try state.value_stack.append(new_string); state.expect_shift = .none; }, @@ -259,7 +262,7 @@ pub const State = struct { switch (value) { .empty => state.expect_shift = .indent, .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)), - .line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)), + .line_string, .concat_string => |str| try list.append(try Value.fromString(arena_alloc, str)), .flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)), .flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)), } @@ -311,7 +314,7 @@ pub const State = struct { state.diagnostics.message = "the document is somehow missing a key (this shouldn't be possible)"; return error.Fail; }, - Value.newScalar(arena_alloc), + Value.emptyScalar(), dkb, ); state.dangling_key = null; @@ -346,10 +349,10 @@ pub const State = struct { .flow_map => |str| { try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .flow_map, dkb), dkb); }, - .line_string, .space_string => |str| { + .line_string, .concat_string => |str| { // string pushes the stack - const new_string = try state.putMapGetValue(map, state.dangling_key.?, try Value.fromString(arena_alloc, str), dkb); - try new_string.string.append(in_line.lineEnding()); + const new_string = try state.putMapGetValue(map, state.dangling_key.?, Value.emptyString(), dkb); + try state.string_builder.appendSlice(arena_alloc, str); try state.value_stack.append(new_string); state.expect_shift = .none; }, @@ -388,7 +391,7 @@ pub const State = struct { state.dangling_key = dupekey; }, .scalar => |str| try state.putMap(map, dupekey, try Value.fromScalar(arena_alloc, str), dkb), - .line_string, .space_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb), + .line_string, .concat_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb), .flow_list => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_list, dkb), dkb), .flow_map => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_map, dkb), dkb), } @@ -457,7 +460,7 @@ pub const State = struct { ',' => { // empty value const tip = try state.getStackTip(); - try tip.flow_list.append(Value.newScalar(arena_alloc)); + try tip.flow_list.append(Value.emptyScalar()); item_start = idx + 1; }, '{' => { @@ -491,7 +494,7 @@ pub const State = struct { return error.BadState; }; if (finished.flow_list.items.len > 0 or idx > item_start) - try finished.flow_list.append(Value.newScalar(arena_alloc)); + try finished.flow_list.append(Value.emptyScalar()); pstate = try state.popFlowStack(); }, else => { @@ -599,7 +602,7 @@ pub const State = struct { try state.putMap( &tip.flow_map, dangling_key.?, - Value.newScalar(arena_alloc), + Value.emptyScalar(), dkb, ); @@ -641,7 +644,7 @@ pub const State = struct { try state.putMap( &tip.flow_map, dangling_key.?, - Value.newScalar(arena_alloc), + Value.emptyScalar(), dkb, ); diff --git a/src/parser/value.zig b/src/parser/value.zig index d312c69..4e9bf36 100644 --- a/src/parser/value.zig +++ b/src/parser/value.zig @@ -41,7 +41,7 @@ pub fn Parsed(comptime T: type) type { } pub const Value = union(enum) { - pub const String = std.ArrayList(u8); + pub const String = []const u8; pub const Map = std.StringArrayHashMap(Value); pub const List = std.ArrayList(Value); pub const TagType = @typeInfo(Value).Union.tag_type.?; @@ -57,8 +57,8 @@ pub const Value = union(enum) { switch (@typeInfo(T)) { .Void => { switch (self) { - .scalar => |str| return if (str.items.len == 0) void{} else error.BadValue, - .string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue, + .scalar => |str| return if (str.len == 0) void{} else error.BadValue, + .string => |str| return if (options.coerce_strings and str.len == 0) void{} else error.BadValue, else => return error.BadValue, } }, @@ -67,9 +67,9 @@ pub const Value = union(enum) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; for (options.boolean_strings.truthy) |check| - if (std.mem.eql(u8, str.items, check)) return true; + if (std.mem.eql(u8, str, check)) return true; for (options.boolean_strings.falsy) |check| - if (std.mem.eql(u8, str.items, check)) return false; + if (std.mem.eql(u8, str, check)) return false; return error.BadValue; }, @@ -80,8 +80,7 @@ pub const Value = union(enum) { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; - std.debug.print("'{s}'\n", .{str.items}); - return try std.fmt.parseInt(T, str.items, 0); + return try std.fmt.parseInt(T, str, 0); }, else => return error.BadValue, } @@ -90,7 +89,7 @@ pub const Value = union(enum) { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; - return try std.fmt.parseFloat(T, str.items, 0); + return try std.fmt.parseFloat(T, str, 0); }, else => return error.BadValue, } @@ -104,7 +103,7 @@ pub const Value = union(enum) { // probably be solved in the zig stdlib or similar. // TODO: This also doesn't handle sentinels properly. switch (self) { - .scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue, + .scalar, .string => |str| return if (ptr.child == u8) str else error.BadValue, .list, .flow_list => |lst| { var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len); errdefer result.deinit(); @@ -133,9 +132,9 @@ pub const Value = union(enum) { // TODO: This also doesn't handle sentinels properly. switch (self) { .scalar, .string => |str| { - if (arr.child == u8 and str.items.len == arr.len) { + if (arr.child == u8 and str.len == arr.len) { var result: T = undefined; - @memcpy(&result, str.items); + @memcpy(&result, str); return result; } else return error.BadValue; }, @@ -182,7 +181,6 @@ pub const Value = union(enum) { } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) { @field(result, field.name) = null; } else { - std.debug.print("{s}\n", .{field.name}); return error.BadValue; } } @@ -216,9 +214,9 @@ pub const Value = union(enum) { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; - if (std.meta.stringToEnum(T, str.items)) |value| return value; + if (std.meta.stringToEnum(T, str)) |value| return value; if (options.allow_numeric_enums) { - const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch + const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str, 10) catch return error.BadValue; return std.meta.intToEnum(T, parsed) catch error.BadValue; } @@ -255,7 +253,7 @@ pub const Value = union(enum) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; for (options.null_strings) |check| - if (std.mem.eql(u8, str.items, check)) return null; + if (std.mem.eql(u8, str, check)) return null; return try self.convertTo(opt.child, allocator, options); }, @@ -275,17 +273,15 @@ pub const Value = union(enum) { } inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value { - var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len)); - @field(res, @tagName(classification)).appendSliceAssumeCapacity(input); - return res; + return @unionInit(Value, @tagName(classification), try alloc.dupe(u8, input)); } - pub inline fn newScalar(alloc: std.mem.Allocator) Value { - return .{ .scalar = String.init(alloc) }; + pub inline fn emptyScalar() Value { + return .{ .scalar = "" }; } - pub inline fn newString(alloc: std.mem.Allocator) Value { - return .{ .string = String.init(alloc) }; + pub inline fn emptyString() Value { + return .{ .string = "" }; } pub inline fn newList(alloc: std.mem.Allocator) Value { @@ -307,7 +303,7 @@ pub const Value = union(enum) { pub fn recursiveEqualsExact(self: Value, other: Value) bool { if (@as(TagType, self) != other) return false; switch (self) { - inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items), + inline .scalar, .string => |str, tag| return std.mem.eql(u8, str, @field(other, @tagName(tag))), inline .list, .flow_list => |lst, tag| { const olst = @field(other, @tagName(tag)); @@ -341,8 +337,8 @@ pub const Value = union(enum) { fn printRecursive(self: Value, indent: usize) void { switch (self) { .scalar, .string => |str| { - if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| { - var lines = std.mem.splitScalar(u8, str.items, '\n'); + if (std.mem.indexOfScalar(u8, str, '\n')) |_| { + var lines = std.mem.splitScalar(u8, str, '\n'); std.debug.print("\n", .{}); while (lines.next()) |line| { std.debug.print( @@ -356,7 +352,7 @@ pub const Value = union(enum) { ); } } else { - std.debug.print("{s}", .{str.items}); + std.debug.print("{s}", .{str}); } }, .list, .flow_list => |list| { diff --git a/src/tokenizer.zig b/src/tokenizer.zig index 819603f..acab761 100644 --- a/src/tokenizer.zig +++ b/src/tokenizer.zig @@ -23,18 +23,10 @@ pub const InlineItem = union(enum) { empty: void, scalar: []const u8, line_string: []const u8, - space_string: []const u8, + concat_string: []const u8, flow_list: []const u8, flow_map: []const u8, - - pub fn lineEnding(self: InlineItem) u8 { - return switch (self) { - .line_string => '\n', - .space_string => ' ', - else => unreachable, - }; - } }; pub const LineContents = union(enum) { @@ -306,7 +298,7 @@ pub fn LineTokenizer(comptime Buffer: type) type { return if (char == '>') .{ .line_string = slice } else - .{ .space_string = slice }; + .{ .concat_string = slice }; }, '[' => { if (buf.len - start < 2 or buf[buf.len - 1] != ']') {