parser: parse into 0-terminated strings

This was kind of an annoying change to make since 0.11.0 has issues
where it will point to the wrong srcloc on compile errors in generic
code (which this 100% is) fortunately fixed in master. The motivation
for this change is that the arg vector already contains 0-terminated
strings, so we can avoid a lot of copies. This makes forwarding
command-line arguments to C-functions that expect zero-terminated
strings much more straightforward, and they also automatically decay
to normal slices.

Unfortunately, environment variable values are NOT zero-terminated, so
they are currently copied with zero-termination. This seems to be the
fault of Windows/WASI, both of which already are performing
allocations (Windows to convert from UTF-16 to UTF-8, and WASI to get
a copy of the environment). By duplicating the std EnvMap
implementation, we could make a version that generates 0-terminated
env vars without extra copies, but I'll skip on doing that for now.
This commit is contained in:
torque 2023-08-27 13:53:14 -07:00
parent 0695743a1f
commit 390a1ba4fd
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
6 changed files with 62 additions and 17 deletions

View File

@ -30,6 +30,13 @@ const cli = cmd: {
.description = "enum choice option", .description = "enum choice option",
.nice_type_name = "choice", .nice_type_name = "choice",
}); });
cmd.stringOption(.{
.name = "string",
.short_tag = "-s",
.long_tag = "--string",
.env_var = "NOCLIP_STRING",
.description = "A string value option",
});
cmd.addOption(.{ .OutputType = u32 }, .{ cmd.addOption(.{ .OutputType = u32 }, .{
.name = "default", .name = "default",
.short_tag = "-d", .short_tag = "-d",
@ -80,8 +87,8 @@ const subcommand = cmd: {
.falsy = .{ .long_tag = "--no-flag" }, .falsy = .{ .long_tag = "--no-flag" },
.env_var = "NOCLIP_SUBFLAG", .env_var = "NOCLIP_SUBFLAG",
}); });
cmd.addArgument(.{ .OutputType = []const u8 }, .{ .name = "argument" }); cmd.stringArgument(.{ .name = "argument" });
cmd.addArgument(.{ .OutputType = []const u8 }, .{ cmd.stringArgument(.{
.name = "arg", .name = "arg",
.description = "This is an argument that doesn't really do anything, but it's very important.", .description = "This is an argument that doesn't really do anything, but it's very important.",
}); });
@ -95,6 +102,7 @@ fn subHandler(context: []const u8, result: subcommand.Output()) !void {
fn cliHandler(context: *u32, result: cli.Output()) !void { fn cliHandler(context: *u32, result: cli.Output()) !void {
std.debug.print("context: {d}\n", .{context.*}); std.debug.print("context: {d}\n", .{context.*});
std.debug.print("callback is working {s}\n", .{result.string orelse "null"});
std.debug.print("callback is working {any}\n", .{result.choice}); std.debug.print("callback is working {any}\n", .{result.choice});
std.debug.print("callback is working {d}\n", .{result.default}); std.debug.print("callback is working {d}\n", .{result.default});
context.* += 1; context.* += 1;

View File

@ -133,7 +133,7 @@ pub fn CommandBuilder(comptime UserContext: type) type {
self.help_flag = tags; self.help_flag = tags;
} }
const string_generics = BuilderGenerics(UserContext){ .OutputType = []const u8 }; const string_generics = BuilderGenerics(UserContext){ .OutputType = [:0]const u8 };
pub fn stringOption( pub fn stringOption(
comptime self: *@This(), comptime self: *@This(),

View File

@ -61,7 +61,7 @@ fn MultiConverter(comptime gen: ParameterGenerics) ?ConverterSignature(gen) {
fn FlagConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn FlagConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
return struct { return struct {
pub fn handler(_: gen.UserContext, input: []const u8, _: ErrorWriter) ConversionError!bool { pub fn handler(_: gen.UserContext, input: [:0]const u8, _: ErrorWriter) ConversionError!bool {
// treat an empty string as falsy // treat an empty string as falsy
if (input.len == 0) return false; if (input.len == 0) return false;
@ -81,7 +81,7 @@ fn FlagConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
fn StringConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn StringConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
return struct { return struct {
pub fn handler(_: gen.UserContext, input: []const u8, _: ErrorWriter) ConversionError![]const u8 { pub fn handler(_: gen.UserContext, input: [:0]const u8, _: ErrorWriter) ConversionError![:0]const u8 {
return input; return input;
} }
}.handler; }.handler;
@ -91,7 +91,7 @@ fn IntConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
const IntType = gen.OutputType; const IntType = gen.OutputType;
return struct { return struct {
pub fn handler(_: gen.UserContext, input: []const u8, failure: ErrorWriter) ConversionError!IntType { pub fn handler(_: gen.UserContext, input: [:0]const u8, failure: ErrorWriter) ConversionError!IntType {
return std.fmt.parseInt(IntType, input, 0) catch { return std.fmt.parseInt(IntType, input, 0) catch {
try failure.print("cannot interpret \"{s}\" as an integer", .{input}); try failure.print("cannot interpret \"{s}\" as an integer", .{input});
return ConversionError.ConversionFailed; return ConversionError.ConversionFailed;
@ -137,7 +137,7 @@ fn ChoiceConverter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
const EnumType = gen.OutputType; const EnumType = gen.OutputType;
return struct { return struct {
pub fn handler(_: gen.UserContext, input: []const u8, failure: ErrorWriter) ConversionError!EnumType { pub fn handler(_: gen.UserContext, input: [:0]const u8, failure: ErrorWriter) ConversionError!EnumType {
return std.meta.stringToEnum(gen.ConvertedType(), input) orelse { return std.meta.stringToEnum(gen.ConvertedType(), input) orelse {
try failure.print("\"{s}\" is not a valid choice", .{input}); try failure.print("\"{s}\" is not a valid choice", .{input});
return ConversionError.ConversionFailed; return ConversionError.ConversionFailed;

View File

@ -174,6 +174,39 @@ pub fn SliceIterator(comptime T: type) type {
}; };
} }
pub fn MutatingZSplitter(comptime T: type) type {
return struct {
buffer: [:0]T,
delimiter: T,
index: ?usize = 0,
const Self = @This();
/// Returns a slice of the next field, or null if splitting is complete.
pub fn next(self: *Self) ?[:0]T {
const start = self.index orelse return null;
const end = if (std.mem.indexOfScalarPos(T, self.buffer, start, self.delimiter)) |delim_idx| blk: {
self.buffer[delim_idx] = 0;
self.index = delim_idx + 1;
break :blk delim_idx;
} else blk: {
self.index = null;
break :blk self.buffer.len;
};
return self.buffer[start..end :0];
}
/// Returns a slice of the remaining bytes. Does not affect iterator state.
pub fn rest(self: Self) [:0]T {
const end = self.buffer.len;
const start = self.index orelse end;
return self.buffer[start..end :0];
}
};
}
pub fn copyStruct(comptime T: type, source: T, field_overrides: anytype) T { pub fn copyStruct(comptime T: type, source: T, field_overrides: anytype) T {
var result: T = undefined; var result: T = undefined;

View File

@ -18,7 +18,7 @@ pub const FlagBias = enum {
truthy, truthy,
unbiased, unbiased,
pub fn string(comptime self: @This()) []const u8 { pub fn string(comptime self: @This()) [:0]const u8 {
return switch (comptime self) { return switch (comptime self) {
.truthy => "true", .truthy => "true",
.falsy => "false", .falsy => "false",
@ -110,12 +110,12 @@ pub const ParameterGenerics = struct {
pub fn IntermediateValue(comptime self: @This()) type { pub fn IntermediateValue(comptime self: @This()) type {
return comptime switch (self.value_count) { return comptime switch (self.value_count) {
.flag => []const u8, .flag => [:0]const u8,
.count => usize, .count => usize,
.fixed => |count| switch (count) { .fixed => |count| switch (count) {
0 => @compileError("bad fixed-zero parameter"), 0 => @compileError("bad fixed-zero parameter"),
1 => []const u8, 1 => [:0]const u8,
else => std.ArrayList([]const u8), else => std.ArrayList([:0]const u8),
}, },
}; };
} }

View File

@ -256,7 +256,7 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
fn parseLongTag( fn parseLongTag(
self: *@This(), self: *@This(),
name: []const u8, name: []const u8,
arg: []const u8, arg: [:0]u8,
argit: *ncmeta.SliceIterator([][:0]u8), argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void { ) ParseError!void {
if (comptime command.help_flag.long_tag) |long| if (comptime command.help_flag.long_tag) |long|
@ -315,7 +315,7 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
fn parseOrdinals( fn parseOrdinals(
self: *@This(), self: *@This(),
arg: []const u8, arg: [:0]u8,
argit: *ncmeta.SliceIterator([][:0]u8), argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!?ParserInterface { ) ParseError!?ParserInterface {
comptime var arg_index: u32 = 0; comptime var arg_index: u32 = 0;
@ -373,12 +373,13 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
0 => return ParseError.ExtraValue, 0 => return ParseError.ExtraValue,
1 => try self.pushIntermediateValue(param, argit.next() orelse return ParseError.MissingValue), 1 => try self.pushIntermediateValue(param, argit.next() orelse return ParseError.MissingValue),
else => |total| { else => |total| {
var list = std.ArrayList([]const u8).initCapacity(self.allocator, total) catch var list = std.ArrayList([:0]const u8).initCapacity(self.allocator, total) catch
return ParseError.UnexpectedFailure; return ParseError.UnexpectedFailure;
var consumed: u32 = 0; var consumed: u32 = 0;
while (consumed < total) : (consumed += 1) { while (consumed < total) : (consumed += 1) {
const next = argit.next() orelse return ParseError.MissingValue; const next = argit.next() orelse return ParseError.MissingValue;
list.append(next) catch return ParseError.UnexpectedFailure; list.append(next) catch return ParseError.UnexpectedFailure;
} }
if (bounded and argit.next() != null) return ParseError.ExtraValue; if (bounded and argit.next() != null) return ParseError.ExtraValue;
@ -392,9 +393,9 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
fn applyFusedValues( fn applyFusedValues(
self: *@This(), self: *@This(),
comptime param: anytype, comptime param: anytype,
value: []const u8, value: [:0]u8,
) ParseError!void { ) ParseError!void {
var iter = std.mem.split(u8, value, ","); var iter = ncmeta.MutatingZSplitter(u8){ .buffer = value, .delimiter = ',' };
return try self.applyParamValues(param, &iter, true); return try self.applyParamValues(param, &iter, true);
} }
@ -402,7 +403,10 @@ pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
inline for (comptime parameters) |param| { inline for (comptime parameters) |param| {
if (comptime param.env_var) |env_var| blk: { if (comptime param.env_var) |env_var| blk: {
if (@field(self.intermediate, param.name) != null) break :blk; if (@field(self.intermediate, param.name) != null) break :blk;
const val = env.get(env_var) orelse break :blk;
const val = self.allocator.dupeZ(u8, env.get(env_var) orelse break :blk) catch
return ParseError.UnexpectedFailure;
if (comptime @TypeOf(param).G.value_count == .flag) { if (comptime @TypeOf(param).G.value_count == .flag) {
try self.pushIntermediateValue(param, val); try self.pushIntermediateValue(param, val);
} else { } else {