From 390a1ba4fda46b6ddaabe81d79497db7535bb454 Mon Sep 17 00:00:00 2001 From: torque Date: Sun, 27 Aug 2023 13:53:14 -0700 Subject: [PATCH] parser: parse into 0-terminated strings This was kind of an annoying change to make since 0.11.0 has issues where it will point to the wrong srcloc on compile errors in generic code (which this 100% is) fortunately fixed in master. The motivation for this change is that the arg vector already contains 0-terminated strings, so we can avoid a lot of copies. This makes forwarding command-line arguments to C-functions that expect zero-terminated strings much more straightforward, and they also automatically decay to normal slices. Unfortunately, environment variable values are NOT zero-terminated, so they are currently copied with zero-termination. This seems to be the fault of Windows/WASI, both of which already are performing allocations (Windows to convert from UTF-16 to UTF-8, and WASI to get a copy of the environment). By duplicating the std EnvMap implementation, we could make a version that generates 0-terminated env vars without extra copies, but I'll skip on doing that for now. --- demo/demo.zig | 12 ++++++++++-- source/command.zig | 2 +- source/converters.zig | 8 ++++---- source/meta.zig | 33 +++++++++++++++++++++++++++++++++ source/parameters.zig | 8 ++++---- source/parser.zig | 16 ++++++++++------ 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/demo/demo.zig b/demo/demo.zig index a8db216..318283f 100644 --- a/demo/demo.zig +++ b/demo/demo.zig @@ -30,6 +30,13 @@ const cli = cmd: { .description = "enum choice option", .nice_type_name = "choice", }); + cmd.stringOption(.{ + .name = "string", + .short_tag = "-s", + .long_tag = "--string", + .env_var = "NOCLIP_STRING", + .description = "A string value option", + }); cmd.addOption(.{ .OutputType = u32 }, .{ .name = "default", .short_tag = "-d", @@ -80,8 +87,8 @@ const subcommand = cmd: { .falsy = .{ .long_tag = "--no-flag" }, .env_var = "NOCLIP_SUBFLAG", }); - cmd.addArgument(.{ .OutputType = []const u8 }, .{ .name = "argument" }); - cmd.addArgument(.{ .OutputType = []const u8 }, .{ + cmd.stringArgument(.{ .name = "argument" }); + cmd.stringArgument(.{ .name = "arg", .description = "This is an argument that doesn't really do anything, but it's very important.", }); @@ -95,6 +102,7 @@ fn subHandler(context: []const u8, result: subcommand.Output()) !void { fn cliHandler(context: *u32, result: cli.Output()) !void { std.debug.print("context: {d}\n", .{context.*}); + std.debug.print("callback is working {s}\n", .{result.string orelse "null"}); std.debug.print("callback is working {any}\n", .{result.choice}); std.debug.print("callback is working {d}\n", .{result.default}); context.* += 1; diff --git a/source/command.zig b/source/command.zig index b0a78d5..f8780f3 100644 --- a/source/command.zig +++ b/source/command.zig @@ -133,7 +133,7 @@ pub fn CommandBuilder(comptime UserContext: type) type { self.help_flag = tags; } - const string_generics = BuilderGenerics(UserContext){ .OutputType = []const u8 }; + const string_generics = BuilderGenerics(UserContext){ .OutputType = [:0]const u8 }; pub fn stringOption( comptime self: *@This(), diff --git a/source/converters.zig b/source/converters.zig index 80cb392..b427ba4 100644 --- a/source/converters.zig +++ b/source/converters.zig @@ -61,7 +61,7 @@ fn MultiConverter(comptime gen: ParameterGenerics) ?ConverterSignature(gen) { fn FlagConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { return struct { - pub fn handler(_: gen.UserContext, input: []const u8, _: ErrorWriter) ConversionError!bool { + pub fn handler(_: gen.UserContext, input: [:0]const u8, _: ErrorWriter) ConversionError!bool { // treat an empty string as falsy if (input.len == 0) return false; @@ -81,7 +81,7 @@ fn FlagConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn StringConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { return struct { - pub fn handler(_: gen.UserContext, input: []const u8, _: ErrorWriter) ConversionError![]const u8 { + pub fn handler(_: gen.UserContext, input: [:0]const u8, _: ErrorWriter) ConversionError![:0]const u8 { return input; } }.handler; @@ -91,7 +91,7 @@ fn IntConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { const IntType = gen.OutputType; return struct { - pub fn handler(_: gen.UserContext, input: []const u8, failure: ErrorWriter) ConversionError!IntType { + pub fn handler(_: gen.UserContext, input: [:0]const u8, failure: ErrorWriter) ConversionError!IntType { return std.fmt.parseInt(IntType, input, 0) catch { try failure.print("cannot interpret \"{s}\" as an integer", .{input}); return ConversionError.ConversionFailed; @@ -137,7 +137,7 @@ fn ChoiceConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { const EnumType = gen.OutputType; return struct { - pub fn handler(_: gen.UserContext, input: []const u8, failure: ErrorWriter) ConversionError!EnumType { + pub fn handler(_: gen.UserContext, input: [:0]const u8, failure: ErrorWriter) ConversionError!EnumType { return std.meta.stringToEnum(gen.ConvertedType(), input) orelse { try failure.print("\"{s}\" is not a valid choice", .{input}); return ConversionError.ConversionFailed; diff --git a/source/meta.zig b/source/meta.zig index 733ddba..a60d34b 100644 --- a/source/meta.zig +++ b/source/meta.zig @@ -174,6 +174,39 @@ pub fn SliceIterator(comptime T: type) type { }; } +pub fn MutatingZSplitter(comptime T: type) type { + return struct { + buffer: [:0]T, + delimiter: T, + index: ?usize = 0, + + const Self = @This(); + + /// Returns a slice of the next field, or null if splitting is complete. + pub fn next(self: *Self) ?[:0]T { + const start = self.index orelse return null; + + const end = if (std.mem.indexOfScalarPos(T, self.buffer, start, self.delimiter)) |delim_idx| blk: { + self.buffer[delim_idx] = 0; + self.index = delim_idx + 1; + break :blk delim_idx; + } else blk: { + self.index = null; + break :blk self.buffer.len; + }; + + return self.buffer[start..end :0]; + } + + /// Returns a slice of the remaining bytes. Does not affect iterator state. + pub fn rest(self: Self) [:0]T { + const end = self.buffer.len; + const start = self.index orelse end; + return self.buffer[start..end :0]; + } + }; +} + pub fn copyStruct(comptime T: type, source: T, field_overrides: anytype) T { var result: T = undefined; diff --git a/source/parameters.zig b/source/parameters.zig index 0c314d4..085fc96 100644 --- a/source/parameters.zig +++ b/source/parameters.zig @@ -18,7 +18,7 @@ pub const FlagBias = enum { truthy, unbiased, - pub fn string(comptime self: @This()) []const u8 { + pub fn string(comptime self: @This()) [:0]const u8 { return switch (comptime self) { .truthy => "true", .falsy => "false", @@ -110,12 +110,12 @@ pub const ParameterGenerics = struct { pub fn IntermediateValue(comptime self: @This()) type { return comptime switch (self.value_count) { - .flag => []const u8, + .flag => [:0]const u8, .count => usize, .fixed => |count| switch (count) { 0 => @compileError("bad fixed-zero parameter"), - 1 => []const u8, - else => std.ArrayList([]const u8), + 1 => [:0]const u8, + else => std.ArrayList([:0]const u8), }, }; } diff --git a/source/parser.zig b/source/parser.zig index cb6385e..29802ff 100644 --- a/source/parser.zig +++ b/source/parser.zig @@ -256,7 +256,7 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type { fn parseLongTag( self: *@This(), name: []const u8, - arg: []const u8, + arg: [:0]u8, argit: *ncmeta.SliceIterator([][:0]u8), ) ParseError!void { if (comptime command.help_flag.long_tag) |long| @@ -315,7 +315,7 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type { fn parseOrdinals( self: *@This(), - arg: []const u8, + arg: [:0]u8, argit: *ncmeta.SliceIterator([][:0]u8), ) ParseError!?ParserInterface { comptime var arg_index: u32 = 0; @@ -373,12 +373,13 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type { 0 => return ParseError.ExtraValue, 1 => try self.pushIntermediateValue(param, argit.next() orelse return ParseError.MissingValue), else => |total| { - var list = std.ArrayList([]const u8).initCapacity(self.allocator, total) catch + var list = std.ArrayList([:0]const u8).initCapacity(self.allocator, total) catch return ParseError.UnexpectedFailure; var consumed: u32 = 0; while (consumed < total) : (consumed += 1) { const next = argit.next() orelse return ParseError.MissingValue; + list.append(next) catch return ParseError.UnexpectedFailure; } if (bounded and argit.next() != null) return ParseError.ExtraValue; @@ -392,9 +393,9 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type { fn applyFusedValues( self: *@This(), comptime param: anytype, - value: []const u8, + value: [:0]u8, ) ParseError!void { - var iter = std.mem.split(u8, value, ","); + var iter = ncmeta.MutatingZSplitter(u8){ .buffer = value, .delimiter = ',' }; return try self.applyParamValues(param, &iter, true); } @@ -402,7 +403,10 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type { inline for (comptime parameters) |param| { if (comptime param.env_var) |env_var| blk: { if (@field(self.intermediate, param.name) != null) break :blk; - const val = env.get(env_var) orelse break :blk; + + const val = self.allocator.dupeZ(u8, env.get(env_var) orelse break :blk) catch + return ParseError.UnexpectedFailure; + if (comptime @TypeOf(param).G.value_count == .flag) { try self.pushIntermediateValue(param, val); } else {