From 33ab092a060cfaee2b9d536a8b6d24b7f312421f Mon Sep 17 00:00:00 2001 From: torque Date: Thu, 23 Nov 2023 17:52:16 -0800 Subject: [PATCH] value: store strings/scalars as null-terminated Since these were already always copied from the source data, this was a very easy change to implement. This makes our output schema string detection a bit stricter, and saves performing a copy in the case that the output string needs to be 0 terminated. Unfortunately, we can't skip copies in the general slice case since each child element needs to get converted to the appropriate type. --- src/parser/state.zig | 4 ++-- src/parser/value.zig | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/parser/state.zig b/src/parser/state.zig index c1ed25b..b0c3c99 100644 --- a/src/parser/state.zig +++ b/src/parser/state.zig @@ -70,7 +70,7 @@ pub const State = struct { }, .value => switch (state.value_stack.getLast().*) { // we have an in-progress string, finish it. - .string => |*string| string.* = try state.string_builder.toOwnedSlice(arena_alloc), + .string => |*string| string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0), // if we have a dangling -, attach an empty scalar to it .list => |*list| if (state.expect_shift == .indent) try list.append(Value.emptyScalar()), // if we have a dangling "key:", attach an empty scalar to it @@ -185,7 +185,7 @@ pub const State = struct { if (firstpass and line.shift == .dedent) { // copy the string into the document proper - string.* = try state.string_builder.toOwnedSlice(arena_alloc); + string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0); var dedent_depth = line.shift.dedent; while (dedent_depth > 0) : (dedent_depth -= 1) diff --git a/src/parser/value.zig b/src/parser/value.zig index 1503a8e..1038d01 100644 --- a/src/parser/value.zig +++ b/src/parser/value.zig @@ -51,7 +51,7 @@ pub fn Parsed(comptime T: type) type { } pub const Value = union(enum) { - pub const String = []const u8; + pub const String = [:0]const u8; pub const Map = std.StringArrayHashMap(Value); pub const List = std.ArrayList(Value); pub const TagType = @typeInfo(Value).Union.tag_type.?; @@ -120,12 +120,11 @@ pub const Value = union(enum) { // probably be solved in the zig stdlib or similar. switch (self) { .scalar, .string => |str| { - if (ptr.child == u8) { - if (ptr.sentinel) |sent| { - var copy = try allocator.allocSentinel(u8, str.len, @as(*const u8, @ptrCast(sent)).*); - @memcpy(copy, str); - return copy; - } + if (comptime ptr.child == u8) { + if (comptime ptr.sentinel) |sentinel| + if (comptime @as(*align(1) const ptr.child, @ptrCast(sentinel)).* != 0) + return error.BadValue; + return str; } else { return error.BadValue; @@ -348,7 +347,7 @@ pub const Value = union(enum) { } inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value { - return @unionInit(Value, @tagName(classification), try alloc.dupe(u8, input)); + return @unionInit(Value, @tagName(classification), try alloc.dupeZ(u8, input)); } pub inline fn emptyScalar() Value {