parser: a lot of work on multiple-value options

Also work on a generic runtime parser interface for attaching
subcommands. This will allow subcommands to live in a mapping or
something at runtime which will simplify their use.
This commit is contained in:
torque 2023-03-28 01:07:02 -07:00
parent e31e41d975
commit 0fbbf34156
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
3 changed files with 465 additions and 185 deletions

View File

@ -8,10 +8,7 @@ pub const ConversionError = error{
}; };
pub fn ConverterSignature(comptime gen: ParameterGenerics) type { pub fn ConverterSignature(comptime gen: ParameterGenerics) type {
return if (gen.no_context()) return *const fn (gen.ContextType, []const u8) ConversionError!gen.ResultType();
*const fn ([]const u8) ConversionError!gen.ResultType()
else
*const fn (gen.ContextType, []const u8) ConversionError!gen.ResultType();
} }
pub fn default_converter(comptime gen: ParameterGenerics) ?ConverterSignature(gen) { pub fn default_converter(comptime gen: ParameterGenerics) ?ConverterSignature(gen) {
@ -28,94 +25,50 @@ pub fn default_converter(comptime gen: ParameterGenerics) ?ConverterSignature(ge
} }
fn flag_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn flag_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
return if (gen.no_context()) return struct {
struct { pub fn handler(_: gen.ContextType, input: []const u8) ConversionError!bool {
pub fn handler(input: []const u8) ConversionError!bool { // treat an empty string as falsy
// treat an empty string as falsy if (input.len == 0) return false;
if (input.len == 0) return false;
if (input.len <= 5) { if (input.len <= 5) {
var lowerBuf: [5]u8 = undefined; var lowerBuf: [5]u8 = undefined;
const comp = std.ascii.lowerString(&lowerBuf, input); const comp = std.ascii.lowerString(&lowerBuf, input);
inline for ([_][]const u8{ "false", "no", "0" }) |candidate| { inline for ([_][]const u8{ "false", "no", "0" }) |candidate| {
if (std.mem.eql(u8, comp, candidate)) return false; if (std.mem.eql(u8, comp, candidate)) return false;
}
} }
return true;
} }
}.handler
else
struct {
pub fn handler(_: gen.ContextType, input: []const u8) ConversionError!bool {
// treat an empty string as falsy
if (input.len == 0) return false;
if (input.len <= 5) { return true;
var lowerBuf: [5]u8 = undefined; }
const comp = std.ascii.lowerString(&lowerBuf, input); }.handler;
inline for ([_][]const u8{ "false", "no", "0" }) |candidate| {
if (std.mem.eql(u8, comp, candidate)) return false;
}
}
return true;
}
}.handler;
} }
fn string_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn string_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
return if (gen.no_context()) return struct {
struct { pub fn handler(_: gen.ContextType, value: []const u8) ConversionError![]const u8 {
pub fn handler(value: []const u8) ConversionError![]const u8 { return value;
return value; }
} }.handler;
}.handler
else
struct {
pub fn handler(_: gen.ContextType, value: []const u8) ConversionError![]const u8 {
return value;
}
}.handler;
} }
fn int_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn int_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
const IntType = gen.ResultType(); const IntType = gen.ResultType();
comptime std.debug.assert(@typeInfo(IntType) == .Int);
std.debug.assert(switch (@typeInfo(IntType)) { return struct {
.Int => true, pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!IntType {
else => false, return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue;
}); }
}.handler;
return if (gen.no_context())
struct {
pub fn handler(value: []const u8) ConversionError!IntType {
return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue;
}
}.handler
else
struct {
pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!IntType {
return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue;
}
}.handler;
} }
fn choice_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { fn choice_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) {
const EnumType = gen.ResultType(); const EnumType = gen.ResultType();
return if (gen.no_context()) return struct {
struct { pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!EnumType {
pub fn handler(value: []const u8) ConversionError!EnumType { return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue;
return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue; }
} }.handler;
}.handler
else
struct {
pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!EnumType {
return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue;
}
}.handler;
} }

View File

@ -21,21 +21,35 @@ const Errors = error{
}; };
const ParseError = error{ const ParseError = error{
UnexpectedFailure,
EmptyArgs,
ValueMissing, ValueMissing,
UnexpectedValue,
FusedShortTagValueMissing, FusedShortTagValueMissing,
UnknownLongTagParameter, UnknownLongTagParameter,
UnknownShortTagParameter, UnknownShortTagParameter,
}; };
// in theory, we could also have a flexible value count, which could be followed by
// any number of fixed args and be well-defined. `mv` is a classic example
// of this pattern. But putting that logic in the parser seems to add a lot of
// complexity for little gain. The `mv` use case can be much more easily handled
// with a greedy value and then splitting in the value handler.
const ValueCount = union(enum) {
fixed: u32,
greedy: void,
};
const FlagBias = enum { const FlagBias = enum {
falsy, falsy,
truthy, truthy,
unbiased, unbiased,
pub fn string(self: @This()) []const u8 { pub fn string(comptime self: @This()) []const u8 {
return switch (self) { return switch (self) {
.truthy => "true", .truthy => "true",
else => @compileLog(self), .falsy => "false",
else => @compileError("flag tag with unbiased bias?"),
}; };
} }
}; };
@ -58,13 +72,6 @@ pub const ParameterGenerics = struct {
return self.result == .flag; return self.result == .flag;
} }
pub fn clone(comptime self: @This(), comptime NewResult: type) @This() {
return @This(){
.ContextType = self.ContextType,
.result = .{ .Value = NewResult },
};
}
pub fn ResultType(comptime self: @This()) type { pub fn ResultType(comptime self: @This()) type {
return switch (self.result) { return switch (self.result) {
.Value => |res| res, .Value => |res| res,
@ -108,11 +115,15 @@ fn OptionConfig(comptime generics: ParameterGenerics) type {
long_tag: ?[]const u8 = null, long_tag: ?[]const u8 = null,
env_var: ?[]const u8 = null, env_var: ?[]const u8 = null,
value_count: ValueCount = if (generics.is_flag()) .{ .fixed = 0 } else .{ .fixed = 1 },
default: ?generics.ResultType() = null, default: ?generics.ResultType() = null,
converter: ?ConverterSignature(generics) = null, converter: ?ConverterSignature(generics) = null,
arg_count: u32 = if (generics.is_flag()) 0 else 1, description: []const u8 = "", // description for output in help text
eager: bool = false, eager: bool = false,
required: bool = generics.param_type == .Ordinal, required: bool = generics.param_type == .Ordinal,
global: bool = false,
multi: bool = false,
exposed: bool = true, exposed: bool = true,
secret: bool = false, secret: bool = false,
nice_type_name: []const u8 = @typeName(generics.ResultType()), nice_type_name: []const u8 = @typeName(generics.ResultType()),
@ -121,37 +132,80 @@ fn OptionConfig(comptime generics: ParameterGenerics) type {
fn OptionType(comptime generics: ParameterGenerics) type { fn OptionType(comptime generics: ParameterGenerics) type {
return struct { return struct {
pub const gen = generics;
pub const param_type: ParameterType = generics.param_type; pub const param_type: ParameterType = generics.param_type;
pub const is_flag: bool = generics.is_flag(); pub const is_flag: bool = generics.is_flag();
pub const flag_bias: FlagBias = if (generics.is_flag()) generics.result.flag else .unbiased; pub const flag_bias: FlagBias = if (is_flag) generics.result.flag else .unbiased;
name: []const u8, name: []const u8,
short_tag: ?[]const u8, short_tag: ?[]const u8,
long_tag: ?[]const u8, long_tag: ?[]const u8,
env_var: ?[]const u8, env_var: ?[]const u8,
/// description for output in help text
description: []const u8,
default: ?generics.ResultType(), default: ?generics.ResultType(),
converter: ConverterSignature(generics), converter: ConverterSignature(generics),
description: []const u8 = "", // description for output in help text
arg_count: u32, /// number of values this option wants to consume
value_count: ValueCount,
/// the option converter will be run eagerly, before full command line
/// validation.
eager: bool, eager: bool,
/// the option cannot be omitted from the command line.
required: bool, required: bool,
exposed: bool, // do not expose the resulting value in the output type. the handler must have side effects for this option to do anything
secret: bool, // do not print help for this parameter /// this option is parsed in a pre-parsing pass that consumes it. It
nice_type_name: ?[]const u8 = null, // friendly type name (string better than []const u8) /// may be present anywhere on the command line. A different way to
/// solve this problem is by using an environment variable. It must be
/// a tagged option.
global: bool,
/// allow this named parameter to be passed multiple times.
/// values will be appended when it is encountered. If false, only the
/// final encountered instance will be used.
multi: bool,
/// if false, do not expose the resulting value in the output type.
/// the converter must have side effects for this option to do anything.
exposed: bool,
/// do not print help for this parameter
secret: bool,
nice_type_name: []const u8, // friendly type name (string better than []const u8)
pub fn ResultType(comptime self: @This()) type {
// is this the correct way to collapse this?
return comptime if (self.multi)
std.ArrayList(self._RType())
else
self._RType();
}
inline fn _RType(comptime self: @This()) type {
comptime switch (self.value_count) {
.fixed => |count| {
return switch (count) {
0, 1 => generics.ResultType(),
// TODO: use an ArrayList instead? it generalizes a bit better
// (i.e. can use the same codepath for multi-fixed and greedy)
else => [count]generics.ResultType(),
};
},
.greedy => return std.ArrayList(generics.ResultType()),
};
}
}; };
} }
fn check_short(comptime short_tag: ?[]const u8) void { fn check_short(comptime short_tag: ?[]const u8) void {
if (short_tag) |short| { if (short_tag) |short| {
if (short.len != 2 or short[0] != '-') @compileError("bad short tag"); if (short.len != 2 or short[0] != '-') @compileError("bad short tag" ++ short);
} }
} }
fn check_long(comptime long_tag: ?[]const u8) void { fn check_long(comptime long_tag: ?[]const u8) void {
if (long_tag) |long| { if (long_tag) |long| {
if (long.len < 3 or long[0] != '-' or long[1] != '-') @compileError("bad long tag"); if (long.len < 3 or long[0] != '-' or long[1] != '-') @compileError("bad long tag" ++ long);
} }
} }
@ -183,13 +237,17 @@ fn make_option(comptime generics: ParameterGenerics, comptime opts: OptionConfig
.short_tag = opts.short_tag, .short_tag = opts.short_tag,
.long_tag = opts.long_tag, .long_tag = opts.long_tag,
.env_var = opts.env_var, .env_var = opts.env_var,
.description = opts.description,
.default = opts.default, .default = opts.default,
.converter = converter, .converter = converter,
.arg_count = opts.arg_count, .value_count = opts.value_count,
.eager = opts.eager, .eager = opts.eager,
.required = opts.required, .required = opts.required,
.multi = opts.multi,
.exposed = opts.exposed, .exposed = opts.exposed,
.global = opts.global,
.secret = opts.secret, .secret = opts.secret,
.nice_type_name = opts.nice_type_name,
}; };
} }
@ -207,18 +265,24 @@ fn make_argument(comptime generics: ParameterGenerics, comptime opts: OptionConf
@compileError("implement me"); @compileError("implement me");
}; };
if (opts.multi == true) @compileError("argument " ++ opts.name ++ " cannot be multi");
return OptionType(generics){ return OptionType(generics){
.name = opts.name, .name = opts.name,
.short_tag = opts.short_tag, .short_tag = opts.short_tag,
.long_tag = opts.long_tag, .long_tag = opts.long_tag,
.env_var = opts.env_var, .env_var = opts.env_var,
.description = opts.description,
.default = opts.default, .default = opts.default,
.converter = converter, .converter = converter,
.arg_count = opts.arg_count, .value_count = opts.value_count,
.eager = opts.eager, .eager = opts.eager,
.required = opts.required, .required = opts.required,
.multi = opts.multi,
.global = opts.global,
.exposed = opts.exposed, .exposed = opts.exposed,
.secret = opts.secret, .secret = opts.secret,
.nice_type_name = opts.nice_type_name,
}; };
} }
@ -233,12 +297,15 @@ fn FlagBuilderArgs(comptime ContextType: type) type {
truthy: ?ShortLongPair = null, truthy: ?ShortLongPair = null,
falsy: ?ShortLongPair = null, falsy: ?ShortLongPair = null,
env_var: ?[]const u8 = null, env_var: ?[]const u8 = null,
description: []const u8 = "",
default: ?bool = null, default: ?bool = null,
converter: ?ConverterSignature(flag_generics(.{ .ContextType = ContextType })) = null, converter: ?ConverterSignature(flag_generics(.{ .ContextType = ContextType })) = null,
eager: bool = false, eager: bool = false,
exposed: bool = true,
required: bool = false, required: bool = false,
global: bool = false,
multi: bool = false,
exposed: bool = true,
secret: bool = false, secret: bool = false,
}; };
} }
@ -265,10 +332,23 @@ fn CommandBuilder(comptime ContextType: type) type {
comptime Result: type, comptime Result: type,
comptime args: OptionConfig(tag_generics(.{ .ContextType = ContextType, .Result = Result })), comptime args: OptionConfig(tag_generics(.{ .ContextType = ContextType, .Result = Result })),
) void { ) void {
self.param_spec.add(make_option( const generics = tag_generics(.{ .ContextType = ContextType, .Result = Result });
tag_generics(.{ .ContextType = ContextType, .Result = Result }), if (comptime args.value_count == .fixed and args.value_count.fixed == 0) {
args, @compileError(
)); "please use add_flag rather than add_option to " ++
"create a 0-argument option",
);
}
self.param_spec.add(make_option(generics, args));
}
pub fn set_help_flag(
comptime self: *@This(),
comptime args: OptionConfig(flag_generics(.{ .ContextType = ContextType, .flag_bias = .truthy })),
) void {
_ = self;
_ = args;
} }
pub fn add_flag( pub fn add_flag(
@ -299,9 +379,14 @@ fn CommandBuilder(comptime ContextType: type) type {
.short_tag = truthy_pair.short_tag, .short_tag = truthy_pair.short_tag,
.long_tag = truthy_pair.long_tag, .long_tag = truthy_pair.long_tag,
.env_var = null, .env_var = null,
.description = build_args.description,
.value_count = .{ .fixed = 0 },
.default = build_args.default, .default = build_args.default,
.converter = build_args.converter, .converter = build_args.converter,
.eager = build_args.eager, .eager = build_args.eager,
.required = build_args.required,
.global = build_args.global,
.multi = build_args.multi,
.exposed = build_args.exposed, .exposed = build_args.exposed,
.secret = build_args.secret, .secret = build_args.secret,
}; };
@ -324,9 +409,15 @@ fn CommandBuilder(comptime ContextType: type) type {
.short_tag = falsy_pair.short_tag, .short_tag = falsy_pair.short_tag,
.long_tag = falsy_pair.long_tag, .long_tag = falsy_pair.long_tag,
.env_var = null, .env_var = null,
.description = build_args.description,
.value_count = .{ .fixed = 0 },
.default = build_args.default, .default = build_args.default,
.converter = build_args.converter, .converter = build_args.converter,
.eager = build_args.eager, .eager = build_args.eager,
.required = build_args.required,
.global = build_args.global,
.multi = build_args.multi,
.exposed = build_args.exposed,
.secret = build_args.secret, .secret = build_args.secret,
}; };
@ -337,11 +428,19 @@ fn CommandBuilder(comptime ContextType: type) type {
const generics = flag_generics(.{ .ContextType = ContextType, .flag_bias = .unbiased }); const generics = flag_generics(.{ .ContextType = ContextType, .flag_bias = .unbiased });
const args = OptionConfig(generics){ const args = OptionConfig(generics){
.name = build_args.name, .name = build_args.name,
.short_tag = null,
.long_tag = null,
.env_var = env_var, .env_var = env_var,
.description = build_args.description,
.value_count = .{ .fixed = 0 },
.default = build_args.default, .default = build_args.default,
.converter = build_args.converter, .converter = build_args.converter,
.eager = build_args.eager, .eager = build_args.eager,
.required = build_args.required,
.global = build_args.global,
.multi = build_args.multi,
.secret = build_args.secret, .secret = build_args.secret,
.exposed = build_args.exposed,
}; };
self.param_spec.add(make_option(generics, args)); self.param_spec.add(make_option(generics, args));
@ -353,10 +452,10 @@ fn CommandBuilder(comptime ContextType: type) type {
} }
pub fn CallbackSignature(comptime self: @This()) type { pub fn CallbackSignature(comptime self: @This()) type {
return *const fn (ContextType, self.CommandOutput()) anyerror!void; return *const fn (ContextType, self.Output()) anyerror!void;
} }
pub fn CommandOutput(comptime self: @This()) type { pub fn Output(comptime self: @This()) type {
comptime { comptime {
const spec = self.generate(); const spec = self.generate();
var fields: []const StructField = &[0]StructField{}; var fields: []const StructField = &[0]StructField{};
@ -380,7 +479,7 @@ fn CommandBuilder(comptime ContextType: type) type {
if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) { if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) {
if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) { if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) {
@compileError("redundant flag!!!! " ++ param.name ++ " and " ++ peek_param.name); @compileError("redundant flag!!!! " ++ param.name);
} else { } else {
bais_seen[@enumToInt(PeekType.flag_bias)] = true; bais_seen[@enumToInt(PeekType.flag_bias)] = true;
} }
@ -395,9 +494,9 @@ fn CommandBuilder(comptime ContextType: type) type {
// the optional wrapper is an interesting idea for required // the optional wrapper is an interesting idea for required
// fields. I do not foresee this greatly increasing complexity here. // fields. I do not foresee this greatly increasing complexity here.
const FieldType = if (param.required) const FieldType = if (param.required)
std.meta.Child(std.meta.FieldType(PType, .default)) param.ResultType()
else else
std.meta.FieldType(PType, .default); ?param.ResultType();
// the wacky comptime slice extension hack // the wacky comptime slice extension hack
fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{ fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{
@ -441,7 +540,7 @@ fn CommandBuilder(comptime ContextType: type) type {
if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) { if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) {
if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) { if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) {
@compileError("redundant flag!!!! " ++ param.name ++ " and " ++ peek_param.name); @compileError("redundant flag!!!! " ++ param.name);
} else { } else {
bais_seen[@enumToInt(PeekType.flag_bias)] = true; bais_seen[@enumToInt(PeekType.flag_bias)] = true;
} }
@ -452,10 +551,23 @@ fn CommandBuilder(comptime ContextType: type) type {
} }
} }
// the wacky comptime slice extension hack // This needs to be reconciled with options that take many
// arguments. We could make all of these be ArrayLists of string
// slices instead... but that makes the parsing code much more allocation heavy.
// The problem is essentially that `--long=multi,value` and `--long multi value`
// evaluate to a different number of strings for the same number of arguments.
const FieldType = switch (param.value_count) {
.fixed => |val| switch (val) {
0, 1 => []const u8,
else => std.ArrayList([]const u8),
},
else => std.ArrayList([]const u8),
};
fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{ fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{
.name = param.name, .name = param.name,
.type = ?[]const u8, .type = ?FieldType,
.default_value = @ptrCast(?*const anyopaque, &@as(?[]const u8, null)), .default_value = @ptrCast(?*const anyopaque, &@as(?[]const u8, null)),
.is_comptime = false, .is_comptime = false,
.alignment = @alignOf(?[]const u8), .alignment = @alignOf(?[]const u8),
@ -471,8 +583,45 @@ fn CommandBuilder(comptime ContextType: type) type {
} }
} }
pub fn bind(comptime self: @This(), comptime callback: self.CallbackSignature()) Parser(self, callback) { pub fn bind(
return Parser(self, callback){}; comptime self: @This(),
comptime callback: self.CallbackSignature(),
allocator: std.mem.Allocator,
) Parser(self, callback) {
return Parser(self, callback){ .allocator = allocator };
}
};
}
fn push_unparsed_multi(comptime T: type, comptime field: []const u8, intermediate: *T, value: []const u8, alloc: std.mem.Allocator) !void {
if (@field(intermediate, field) == null) {
@field(intermediate, field) = std.ArrayList([]const u8).init(alloc);
}
try @field(intermediate, field).?.append(value);
}
fn push_unparsed_value(comptime T: type, comptime param: anytype, intermediate: *T, value: []const u8, alloc: std.mem.Allocator) ParseError!void {
switch (comptime param.value_count) {
.fixed => |val| switch (val) {
0, 1 => @field(intermediate, param.name) = value,
else => push_unparsed_multi(T, param.name, intermediate, value, alloc) catch return ParseError.UnexpectedFailure,
},
else => push_unparsed_multi(T, param.name, intermediate, value, alloc) catch return ParseError.UnexpectedFailure,
}
}
fn ParserInterface(comptime ContextType: type) type {
return struct {
ctx: *anyopaque,
methods: *const Interface,
const Interface = struct {
execute: *const fn (ctx: *anyopaque, context: ContextType) anyerror!void,
};
pub fn execute(self: @This(), context: ContextType) anyerror!void {
return try self.methods.execute(self.ctx, context);
} }
}; };
} }
@ -480,72 +629,140 @@ fn CommandBuilder(comptime ContextType: type) type {
// the parser is generated by the bind method of the CommandBuilder, so we can // the parser is generated by the bind method of the CommandBuilder, so we can
// be extremely type-sloppy here, which simplifies the signature. // be extremely type-sloppy here, which simplifies the signature.
fn Parser(comptime command: anytype, comptime callback: anytype) type { fn Parser(comptime command: anytype, comptime callback: anytype) type {
_ = callback;
return struct { return struct {
const ContextType = @TypeOf(command).UserContextType; const ContextType = @TypeOf(command).UserContextType;
// let there be fields! we can move some things to runtime. const Intermediate = command.Intermediate();
// We can get some better behavior if we defer converting non-eager const Output = command.Output();
// options until the entire command line has been parsed. However,
// to do that, we effectively have to store the parameters as strings until the
// entire line has been parsed.
// a goal is to intermediate: Intermediate = .{},
output: Output = undefined,
intermediate: command.Intermediate() = .{},
consumed_args: u32 = 0, consumed_args: u32 = 0,
progname: ?[]const u8 = null,
has_global_tags: bool = false,
allocator: std.mem.Allocator,
// pub fn add_subcommand(self: *@This(), verb: []const u8, parser: anytype) void { // pub fn add_subcommand(self: *@This(), verb: []const u8, parser: anytype) void {
// self.subcommands // self.subcommands
// } // }
pub fn interface(self: *@This()) ParserInterface(ContextType) {
return .{ .ctx = self, .methods = &.{ .execute = wrap_execute } };
}
fn wrap_execute(ctx: *anyopaque, context: ContextType) anyerror!void {
const self = @ptrCast(*@This(), @alignCast(@alignOf(@This()), ctx));
return try self.execute(context);
}
pub fn execute(self: *@This(), context: ContextType) anyerror!void {
const args = try std.process.argsAlloc(self.allocator);
defer std.process.argsFree(self.allocator, args);
var env = try std.process.getEnvMap(self.allocator);
defer env.deinit();
if (args.len < 1) return ParseError.EmptyArgs;
self.progname = args[0];
try self.parse(args[1..]);
// run eager conversions
// try self.convert_eager()
// run normal conversions
// try self.convert()
// execute callback:
try callback(context, self.output);
inline for (@typeInfo(@TypeOf(self.intermediate)).Struct.fields) |field| {
// @compileLog(@typeName(field.type));
if (comptime std.mem.startsWith(u8, @typeName(field.type), "?array_list.ArrayList")) {
if (@field(self.intermediate, field.name)) |list| {
std.debug.print("{s}: [\n", .{field.name});
for (list.items) |item| std.debug.print(" {s},\n", .{item});
std.debug.print("]\n", .{});
} else {
std.debug.print("{s}: null\n", .{field.name});
}
} else {
std.debug.print("{s}: {?s}\n", .{ field.name, @field(self.intermediate, field.name) });
}
}
}
pub fn parse( pub fn parse(
self: *@This(), self: *@This(),
alloc: std.mem.Allocator, args: [][:0]u8,
argit: *std.process.ArgIterator,
env: std.process.EnvMap,
context: ContextType,
) anyerror!void { ) anyerror!void {
_ = alloc; // actually: don't consider env variables until performing conversions. This
// _ = context; // is the most reasonable way to treat the environment as a
// separate "namespace" for e.g. multi options. we only want to use
// environment values if there is nothing specified on the CLI, which cannot
// be determined until the CLI parsing is complete.
try self.read_environment(env); // try self.read_environment(env);
// run pre-parse pass if we have any global parameters
// try self.preparse()
var forced_ordinal = false;
var argit = ncmeta.SliceIterator(@TypeOf(args)).wrap(args);
// there are a LOT of different parsing strategies that can be adopted to
// handle "incorrect" command lines. For example, a --long-style named
// argument could be parsed as an ordered argument if it doesn't match any
// of the specified tag names. However, if the user has not passed `--`
// then it's more likely the erroneous flag is a typo or some other
// erroneous input and should be treated as such. Similarly, handling the
// pair `--long-style --some-value`. if long_style takes one value,
// should --some-value be treated as the value, or should we assume the
// user forgot the value and is specifying a second tag? Getting too clever
// with context (e.g. checking if --some-value is a known tag name)
// probably also violates the principle of least astonishment, as if it
// doesn't match, it could very likely be a typo or other erroneous input.
// In this case we have an out, sort of, as --long-style=--some-value is
// unambiguous in purpose. However, this approach misses for short flags,
// unless we also support a -l=--some-value syntax, which I don't like and
// don't think is a common convention. In this case, I think it is
// reasonable to consume the value without getting fancy,
// e.g. -l --some-value produces 'long_style: "--some-value"'. Odds are, if
// the command line was specified incorrectly, the error will cascade
// through somewhere.
// another consideration is how to deal with mixed --named and positional
// arguments. Theoretically, fixed quantity positional arguments can be
// unambiguously interspersed with named arguments, but that feels sloppy.
// If a positional argument needs to start with --, we have the -- argument
// to force positional parsing.
var forced_args = false;
argloop: while (argit.next()) |arg| { argloop: while (argit.next()) |arg| {
if (!forced_args and std.mem.eql(u8, arg, "--")) { if (!forced_ordinal and std.mem.eql(u8, arg, "--")) {
forced_args = true; forced_ordinal = true;
continue :argloop; continue :argloop;
} }
parse_tags: { if (!forced_ordinal and arg.len > 1 and arg[0] == '-') {
if (forced_args or arg.len < 1 or arg[0] != '-') break :parse_tags;
if (arg.len > 2 and arg[1] == '-') { if (arg.len > 2 and arg[1] == '-') {
try self.parse_long_tag(arg, argit, context); try self.parse_long_tag(arg, &argit);
continue :argloop; continue :argloop;
} else if (arg.len > 1) { } else if (arg.len > 1) {
for (arg[1..], 1..) |short, idx| { for (arg[1..], 1..) |short, idx| {
// _ = short; try self.parse_short_tag(short, arg.len - idx - 1, &argit);
// _ = idx;
try self.parse_short_tag(short, arg.len - idx - 1, argit, context);
} }
continue :argloop; continue :argloop;
} }
// if we've fallen through to here then we will be parsing ordinals
// exclusively from here on out.
forced_ordinal = true;
} }
try self.parse_argument(arg, argit); try self.parse_ordinals(arg, &argit);
} }
} }
inline fn parse_long_tag( inline fn parse_long_tag(
self: *@This(), self: *@This(),
arg: []const u8, arg: []const u8,
argit: *std.process.ArgIterator, argit: *ncmeta.SliceIterator([][:0]u8),
context: ContextType,
) ParseError!void { ) ParseError!void {
_ = context;
inline for (comptime command.generate()) |param| { inline for (comptime command.generate()) |param| {
const PType = @TypeOf(param); const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die // removing the comptime here causes the compiler to die
@ -554,23 +771,28 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type {
if (comptime PType.is_flag) { if (comptime PType.is_flag) {
if (std.mem.eql(u8, arg, tag)) { if (std.mem.eql(u8, arg, tag)) {
@field(self.intermediate, param.name) = if (comptime PType.flag_bias == .truthy) "true" else "false"; try self.apply_param_values(param, comptime PType.flag_bias.string(), argit);
return; return;
} }
} else { } else {
if (std.mem.startsWith(u8, arg, tag)) match: { if (std.mem.startsWith(u8, arg, tag)) match: {
// TODO: handle more than one value // TODO: in case of --long=value we should split value
const next = if (arg.len == tag.len) // on comma, so e.g. --long=one,two which is kinda docker-style.
argit.next() orelse return ParseError.ValueMissing // This adds complexity. Note that --long=one,two will also
else if (arg[tag.len] == '=') // parse as a single value because we take a different
arg[tag.len + 1 ..] // codepath. In that case presumably the converter will choke if
else // it needs to. Ideally the multi-value stuff would all be
break :match; // shoved into the converter layer, but we can't do that due to
// needing to consume multiple argv values in some cases. This
// could be an opportunity to become opinionated about CLI flag
// styles, but I will not do that for the time being.
if (arg.len == tag.len) {
const next = argit.next() orelse return ParseError.ValueMissing;
try self.apply_param_values(param, next, argit);
} else if (arg[tag.len] == '=') {
try self.apply_fused_values(param, arg[tag.len + 1 ..]);
} else break :match;
@field(self.intermediate, param.name) = next;
// if (comptime param.eager) {
// try param.converter()
// }
return; return;
} }
} }
@ -583,11 +805,8 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type {
self: *@This(), self: *@This(),
arg: u8, arg: u8,
remaining: usize, remaining: usize,
argit: *std.process.ArgIterator, argit: *ncmeta.SliceIterator([][:0]u8),
context: ContextType,
) ParseError!void { ) ParseError!void {
_ = context;
inline for (comptime command.generate()) |param| { inline for (comptime command.generate()) |param| {
const PType = @TypeOf(param); const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die // removing the comptime here causes the compiler to die
@ -596,15 +815,14 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type {
if (comptime PType.is_flag) { if (comptime PType.is_flag) {
if (arg == tag[1]) { if (arg == tag[1]) {
@field(self.intermediate, param.name) = if (comptime PType.flag_bias == .truthy) "true" else "false"; try self.apply_param_values(param, comptime PType.flag_bias.string(), argit);
return; return;
} }
} else { } else {
if (arg == tag[1]) { if (arg == tag[1]) {
if (remaining > 0) return ParseError.FusedShortTagValueMissing; if (remaining > 0) return ParseError.FusedShortTagValueMissing;
const next = argit.next() orelse return ParseError.ValueMissing; const next = argit.next() orelse return ParseError.ValueMissing;
try self.apply_param_values(param, next, argit);
@field(self.intermediate, param.name) = next;
return; return;
} }
} }
@ -613,27 +831,98 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type {
return ParseError.UnknownShortTagParameter; return ParseError.UnknownShortTagParameter;
} }
inline fn parse_argument(self: *@This(), arg: []const u8, argit: *std.process.ArgIterator) ParseError!void { inline fn parse_ordinals(
_ = argit; self: *@This(),
arg: []const u8,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void {
comptime var arg_index: u32 = 0; comptime var arg_index: u32 = 0;
inline for (comptime command.generate()) |param| { inline for (comptime command.generate()) |param| {
if (@TypeOf(param).param_type != .Ordinal) continue; comptime if (@TypeOf(param).param_type != .Ordinal) continue;
if (self.consumed_args == arg_index) { if (self.consumed_args == arg_index) {
std.debug.print("n: {s}, c: {d}, i: {d}\n", .{ param.name, self.consumed_args, arg_index }); try self.apply_param_values(param, arg, argit);
@field(self.intermediate, param.name) = arg;
self.consumed_args += 1; self.consumed_args += 1;
return; return;
} }
arg_index += 1; arg_index += 1;
} }
// look for subcommands now
}
inline fn apply_param_values(self: *@This(), comptime param: anytype, value: []const u8, argit: *ncmeta.SliceIterator([][:0]u8)) ParseError!void {
try push_unparsed_value(Intermediate, param, &self.intermediate, value, self.allocator);
switch (comptime param.value_count) {
.fixed => |count| switch (count) {
0, 1 => return,
else => |total| {
var consumed: u32 = 1;
while (consumed < total) : (consumed += 1) {
const next = argit.next() orelse return ParseError.ValueMissing;
try push_unparsed_value(
Intermediate,
param,
&self.intermediate,
next,
self.allocator,
);
}
},
},
.greedy => {
while (argit.next()) |next| {
try push_unparsed_value(
Intermediate,
param,
&self.intermediate,
next,
self.allocator,
);
}
},
}
}
inline fn apply_fused_values(self: *@This(), comptime param: anytype, value: []const u8) ParseError!void {
switch (comptime param.value_count) {
.fixed => |count| switch (count) {
0 => return ParseError.UnexpectedValue,
1 => try push_unparsed_value(Intermediate, param, &self.intermediate, value, self.allocator),
else => |total| {
var seen: u32 = 0;
var iterator = std.mem.split(u8, value, ",");
while (iterator.next()) |next| {
try push_unparsed_value(Intermediate, param, &self.intermediate, next, self.allocator);
seen += 1;
}
if (seen < total) return ParseError.ValueMissing else if (seen > total) return ParseError.UnexpectedValue;
},
},
.greedy => {
// huh. this is just an unchecked version of the fixed-many case.
var iterator = std.mem.split(u8, value, ",");
while (iterator.next()) |next| {
try push_unparsed_value(Intermediate, param, &self.intermediate, next, self.allocator);
}
},
}
} }
fn read_environment(self: *@This(), env: std.process.EnvMap) !void { fn read_environment(self: *@This(), env: std.process.EnvMap) !void {
inline for (comptime command.generate()) |param| { inline for (comptime command.generate()) |param| {
if (comptime param.env_var) |env_var| { if (comptime param.env_var) |env_var| {
@field(self.intermediate, param.name) = env.get(env_var); const val = env.get(env_var) orelse return;
push_unparsed_value(
Intermediate,
param,
&self.intermediate,
val,
self.allocator,
) catch return ParseError.UnexpectedFailure;
return;
} }
} }
} }
@ -657,6 +946,13 @@ const cli = cmd: {
.short_tag = "-t", .short_tag = "-t",
.long_tag = "--test", .long_tag = "--test",
.env_var = "NOCLIP_TEST", .env_var = "NOCLIP_TEST",
.value_count = .{ .fixed = 2 },
});
cmd.add_option(Choice, .{
.name = "choice",
.short_tag = "-c",
.long_tag = "--choice",
.env_var = "NOCLIP_CHOICE",
}); });
cmd.add_flag(.{ cmd.add_flag(.{
.name = "flag", .name = "flag",
@ -664,31 +960,27 @@ const cli = cmd: {
.falsy = .{ .long_tag = "--no-flag" }, .falsy = .{ .long_tag = "--no-flag" },
.env_var = "NOCLIP_FLAG", .env_var = "NOCLIP_FLAG",
}); });
cmd.add_argument([]const u8, .{ .name = "arg" }); cmd.add_argument([]const u8, .{
cmd.add_argument([]const u8, .{ .name = "argtwo" }); .name = "arg",
// .value_count = .{ .fixed = 3 },
.value_count = .greedy,
});
break :cmd cmd; break :cmd cmd;
}; };
fn cli_handler(_: void, result: cli.CommandOutput()) !void { fn cli_handler(_: void, result: cli.Output()) !void {
_ = result; _ = result;
std.debug.print("callback is working\n", .{});
} }
pub fn main() !void { pub fn main() !void {
// std.debug.print("hello\n", .{});
var parser = cli.bind(cli_handler);
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit(); defer arena.deinit();
const allocator = arena.allocator(); const allocator = arena.allocator();
var argit = try std.process.argsWithAllocator(allocator);
var env = try std.process.getEnvMap(allocator);
_ = argit.next(); var parser = cli.bind(cli_handler, allocator);
try parser.parse(allocator, &argit, env, {}); const iface = parser.interface();
try iface.execute({});
inline for (@typeInfo(@TypeOf(parser.intermediate)).Struct.fields) |field| {
std.debug.print("{s}: {?s}\n", .{ field.name, @field(parser.intermediate, field.name) });
}
} }

View File

@ -52,6 +52,41 @@ pub fn enum_length(comptime T: type) comptime_int {
return @typeInfo(T).Enum.fields.len; return @typeInfo(T).Enum.fields.len;
} }
pub fn SliceIterator(comptime T: type) type {
// could be expanded to use std.meta.Elem, perhaps
const ResultType = std.meta.Child(T);
return struct {
index: usize,
data: T,
pub const InitError = error{};
pub fn wrap(value: T) @This() {
return @This(){ .index = 0, .data = value };
}
pub fn next(self: *@This()) ?ResultType {
if (self.index == self.data.len) return null;
defer self.index += 1;
return self.data[self.index];
}
pub fn peek(self: *@This()) ?ResultType {
if (self.index == self.data.len) return null;
return self.data[self.index];
}
pub fn skip(self: *@This()) void {
if (self.index == self.data.len) return;
self.index += 1;
}
};
}
/// Stores type-erased pointers to items in comptime extensible data structures, /// Stores type-erased pointers to items in comptime extensible data structures,
/// which allows e.g. assembling a tuple through multiple calls rather than all /// which allows e.g. assembling a tuple through multiple calls rather than all
/// at once. /// at once.