From 0fbbf341561f4b6f414446e4ae9d0fe7bd7fa9a0 Mon Sep 17 00:00:00 2001 From: torque Date: Tue, 28 Mar 2023 01:07:02 -0700 Subject: [PATCH] parser: a lot of work on multiple-value options Also work on a generic runtime parser interface for attaching subcommands. This will allow subcommands to live in a mapping or something at runtime which will simplify their use. --- source/converters.zig | 105 +++------ source/doodle.zig | 510 +++++++++++++++++++++++++++++++++--------- source/meta.zig | 35 +++ 3 files changed, 465 insertions(+), 185 deletions(-) diff --git a/source/converters.zig b/source/converters.zig index b14e544..357bca0 100644 --- a/source/converters.zig +++ b/source/converters.zig @@ -8,10 +8,7 @@ pub const ConversionError = error{ }; pub fn ConverterSignature(comptime gen: ParameterGenerics) type { - return if (gen.no_context()) - *const fn ([]const u8) ConversionError!gen.ResultType() - else - *const fn (gen.ContextType, []const u8) ConversionError!gen.ResultType(); + return *const fn (gen.ContextType, []const u8) ConversionError!gen.ResultType(); } pub fn default_converter(comptime gen: ParameterGenerics) ?ConverterSignature(gen) { @@ -28,94 +25,50 @@ pub fn default_converter(comptime gen: ParameterGenerics) ?ConverterSignature(ge } fn flag_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { - return if (gen.no_context()) - struct { - pub fn handler(input: []const u8) ConversionError!bool { - // treat an empty string as falsy - if (input.len == 0) return false; + return struct { + pub fn handler(_: gen.ContextType, input: []const u8) ConversionError!bool { + // treat an empty string as falsy + if (input.len == 0) return false; - if (input.len <= 5) { - var lowerBuf: [5]u8 = undefined; - const comp = std.ascii.lowerString(&lowerBuf, input); + if (input.len <= 5) { + var lowerBuf: [5]u8 = undefined; + const comp = std.ascii.lowerString(&lowerBuf, input); - inline for ([_][]const u8{ "false", "no", "0" }) |candidate| { - if (std.mem.eql(u8, comp, candidate)) return false; - } + inline for ([_][]const u8{ "false", "no", "0" }) |candidate| { + if (std.mem.eql(u8, comp, candidate)) return false; } - - return true; } - }.handler - else - struct { - pub fn handler(_: gen.ContextType, input: []const u8) ConversionError!bool { - // treat an empty string as falsy - if (input.len == 0) return false; - if (input.len <= 5) { - var lowerBuf: [5]u8 = undefined; - const comp = std.ascii.lowerString(&lowerBuf, input); - - inline for ([_][]const u8{ "false", "no", "0" }) |candidate| { - if (std.mem.eql(u8, comp, candidate)) return false; - } - } - - return true; - } - }.handler; + return true; + } + }.handler; } fn string_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { - return if (gen.no_context()) - struct { - pub fn handler(value: []const u8) ConversionError![]const u8 { - return value; - } - }.handler - else - struct { - pub fn handler(_: gen.ContextType, value: []const u8) ConversionError![]const u8 { - return value; - } - }.handler; + return struct { + pub fn handler(_: gen.ContextType, value: []const u8) ConversionError![]const u8 { + return value; + } + }.handler; } fn int_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { const IntType = gen.ResultType(); + comptime std.debug.assert(@typeInfo(IntType) == .Int); - std.debug.assert(switch (@typeInfo(IntType)) { - .Int => true, - else => false, - }); - - return if (gen.no_context()) - struct { - pub fn handler(value: []const u8) ConversionError!IntType { - return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue; - } - }.handler - else - struct { - pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!IntType { - return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue; - } - }.handler; + return struct { + pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!IntType { + return std.fmt.parseInt(IntType, value, 0) catch return ConversionError.BadValue; + } + }.handler; } fn choice_converter(comptime gen: ParameterGenerics) ConverterSignature(gen) { const EnumType = gen.ResultType(); - return if (gen.no_context()) - struct { - pub fn handler(value: []const u8) ConversionError!EnumType { - return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue; - } - }.handler - else - struct { - pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!EnumType { - return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue; - } - }.handler; + return struct { + pub fn handler(_: gen.ContextType, value: []const u8) ConversionError!EnumType { + return std.meta.stringToEnum(gen.ResultType(), value) orelse ConversionError.BadValue; + } + }.handler; } diff --git a/source/doodle.zig b/source/doodle.zig index de6d5b0..13a352d 100644 --- a/source/doodle.zig +++ b/source/doodle.zig @@ -21,21 +21,35 @@ const Errors = error{ }; const ParseError = error{ + UnexpectedFailure, + EmptyArgs, ValueMissing, + UnexpectedValue, FusedShortTagValueMissing, UnknownLongTagParameter, UnknownShortTagParameter, }; +// in theory, we could also have a flexible value count, which could be followed by +// any number of fixed args and be well-defined. `mv` is a classic example +// of this pattern. But putting that logic in the parser seems to add a lot of +// complexity for little gain. The `mv` use case can be much more easily handled +// with a greedy value and then splitting in the value handler. +const ValueCount = union(enum) { + fixed: u32, + greedy: void, +}; + const FlagBias = enum { falsy, truthy, unbiased, - pub fn string(self: @This()) []const u8 { + pub fn string(comptime self: @This()) []const u8 { return switch (self) { .truthy => "true", - else => @compileLog(self), + .falsy => "false", + else => @compileError("flag tag with unbiased bias?"), }; } }; @@ -58,13 +72,6 @@ pub const ParameterGenerics = struct { return self.result == .flag; } - pub fn clone(comptime self: @This(), comptime NewResult: type) @This() { - return @This(){ - .ContextType = self.ContextType, - .result = .{ .Value = NewResult }, - }; - } - pub fn ResultType(comptime self: @This()) type { return switch (self.result) { .Value => |res| res, @@ -108,11 +115,15 @@ fn OptionConfig(comptime generics: ParameterGenerics) type { long_tag: ?[]const u8 = null, env_var: ?[]const u8 = null, + value_count: ValueCount = if (generics.is_flag()) .{ .fixed = 0 } else .{ .fixed = 1 }, default: ?generics.ResultType() = null, converter: ?ConverterSignature(generics) = null, - arg_count: u32 = if (generics.is_flag()) 0 else 1, + description: []const u8 = "", // description for output in help text + eager: bool = false, required: bool = generics.param_type == .Ordinal, + global: bool = false, + multi: bool = false, exposed: bool = true, secret: bool = false, nice_type_name: []const u8 = @typeName(generics.ResultType()), @@ -121,37 +132,80 @@ fn OptionConfig(comptime generics: ParameterGenerics) type { fn OptionType(comptime generics: ParameterGenerics) type { return struct { - pub const gen = generics; pub const param_type: ParameterType = generics.param_type; pub const is_flag: bool = generics.is_flag(); - pub const flag_bias: FlagBias = if (generics.is_flag()) generics.result.flag else .unbiased; + pub const flag_bias: FlagBias = if (is_flag) generics.result.flag else .unbiased; name: []const u8, short_tag: ?[]const u8, long_tag: ?[]const u8, env_var: ?[]const u8, + /// description for output in help text + description: []const u8, default: ?generics.ResultType(), converter: ConverterSignature(generics), - description: []const u8 = "", // description for output in help text - arg_count: u32, + + /// number of values this option wants to consume + value_count: ValueCount, + + /// the option converter will be run eagerly, before full command line + /// validation. eager: bool, + + /// the option cannot be omitted from the command line. required: bool, - exposed: bool, // do not expose the resulting value in the output type. the handler must have side effects for this option to do anything - secret: bool, // do not print help for this parameter - nice_type_name: ?[]const u8 = null, // friendly type name (string better than []const u8) + + /// this option is parsed in a pre-parsing pass that consumes it. It + /// may be present anywhere on the command line. A different way to + /// solve this problem is by using an environment variable. It must be + /// a tagged option. + global: bool, + /// allow this named parameter to be passed multiple times. + /// values will be appended when it is encountered. If false, only the + /// final encountered instance will be used. + multi: bool, + /// if false, do not expose the resulting value in the output type. + /// the converter must have side effects for this option to do anything. + exposed: bool, + /// do not print help for this parameter + secret: bool, + + nice_type_name: []const u8, // friendly type name (string better than []const u8) + + pub fn ResultType(comptime self: @This()) type { + // is this the correct way to collapse this? + return comptime if (self.multi) + std.ArrayList(self._RType()) + else + self._RType(); + } + + inline fn _RType(comptime self: @This()) type { + comptime switch (self.value_count) { + .fixed => |count| { + return switch (count) { + 0, 1 => generics.ResultType(), + // TODO: use an ArrayList instead? it generalizes a bit better + // (i.e. can use the same codepath for multi-fixed and greedy) + else => [count]generics.ResultType(), + }; + }, + .greedy => return std.ArrayList(generics.ResultType()), + }; + } }; } fn check_short(comptime short_tag: ?[]const u8) void { if (short_tag) |short| { - if (short.len != 2 or short[0] != '-') @compileError("bad short tag"); + if (short.len != 2 or short[0] != '-') @compileError("bad short tag" ++ short); } } fn check_long(comptime long_tag: ?[]const u8) void { if (long_tag) |long| { - if (long.len < 3 or long[0] != '-' or long[1] != '-') @compileError("bad long tag"); + if (long.len < 3 or long[0] != '-' or long[1] != '-') @compileError("bad long tag" ++ long); } } @@ -183,13 +237,17 @@ fn make_option(comptime generics: ParameterGenerics, comptime opts: OptionConfig .short_tag = opts.short_tag, .long_tag = opts.long_tag, .env_var = opts.env_var, + .description = opts.description, .default = opts.default, .converter = converter, - .arg_count = opts.arg_count, + .value_count = opts.value_count, .eager = opts.eager, .required = opts.required, + .multi = opts.multi, .exposed = opts.exposed, + .global = opts.global, .secret = opts.secret, + .nice_type_name = opts.nice_type_name, }; } @@ -207,18 +265,24 @@ fn make_argument(comptime generics: ParameterGenerics, comptime opts: OptionConf @compileError("implement me"); }; + if (opts.multi == true) @compileError("argument " ++ opts.name ++ " cannot be multi"); + return OptionType(generics){ .name = opts.name, .short_tag = opts.short_tag, .long_tag = opts.long_tag, .env_var = opts.env_var, + .description = opts.description, .default = opts.default, .converter = converter, - .arg_count = opts.arg_count, + .value_count = opts.value_count, .eager = opts.eager, .required = opts.required, + .multi = opts.multi, + .global = opts.global, .exposed = opts.exposed, .secret = opts.secret, + .nice_type_name = opts.nice_type_name, }; } @@ -233,12 +297,15 @@ fn FlagBuilderArgs(comptime ContextType: type) type { truthy: ?ShortLongPair = null, falsy: ?ShortLongPair = null, env_var: ?[]const u8 = null, + description: []const u8 = "", default: ?bool = null, converter: ?ConverterSignature(flag_generics(.{ .ContextType = ContextType })) = null, eager: bool = false, - exposed: bool = true, required: bool = false, + global: bool = false, + multi: bool = false, + exposed: bool = true, secret: bool = false, }; } @@ -265,10 +332,23 @@ fn CommandBuilder(comptime ContextType: type) type { comptime Result: type, comptime args: OptionConfig(tag_generics(.{ .ContextType = ContextType, .Result = Result })), ) void { - self.param_spec.add(make_option( - tag_generics(.{ .ContextType = ContextType, .Result = Result }), - args, - )); + const generics = tag_generics(.{ .ContextType = ContextType, .Result = Result }); + if (comptime args.value_count == .fixed and args.value_count.fixed == 0) { + @compileError( + "please use add_flag rather than add_option to " ++ + "create a 0-argument option", + ); + } + + self.param_spec.add(make_option(generics, args)); + } + + pub fn set_help_flag( + comptime self: *@This(), + comptime args: OptionConfig(flag_generics(.{ .ContextType = ContextType, .flag_bias = .truthy })), + ) void { + _ = self; + _ = args; } pub fn add_flag( @@ -299,9 +379,14 @@ fn CommandBuilder(comptime ContextType: type) type { .short_tag = truthy_pair.short_tag, .long_tag = truthy_pair.long_tag, .env_var = null, + .description = build_args.description, + .value_count = .{ .fixed = 0 }, .default = build_args.default, .converter = build_args.converter, .eager = build_args.eager, + .required = build_args.required, + .global = build_args.global, + .multi = build_args.multi, .exposed = build_args.exposed, .secret = build_args.secret, }; @@ -324,9 +409,15 @@ fn CommandBuilder(comptime ContextType: type) type { .short_tag = falsy_pair.short_tag, .long_tag = falsy_pair.long_tag, .env_var = null, + .description = build_args.description, + .value_count = .{ .fixed = 0 }, .default = build_args.default, .converter = build_args.converter, .eager = build_args.eager, + .required = build_args.required, + .global = build_args.global, + .multi = build_args.multi, + .exposed = build_args.exposed, .secret = build_args.secret, }; @@ -337,11 +428,19 @@ fn CommandBuilder(comptime ContextType: type) type { const generics = flag_generics(.{ .ContextType = ContextType, .flag_bias = .unbiased }); const args = OptionConfig(generics){ .name = build_args.name, + .short_tag = null, + .long_tag = null, .env_var = env_var, + .description = build_args.description, + .value_count = .{ .fixed = 0 }, .default = build_args.default, .converter = build_args.converter, .eager = build_args.eager, + .required = build_args.required, + .global = build_args.global, + .multi = build_args.multi, .secret = build_args.secret, + .exposed = build_args.exposed, }; self.param_spec.add(make_option(generics, args)); @@ -353,10 +452,10 @@ fn CommandBuilder(comptime ContextType: type) type { } pub fn CallbackSignature(comptime self: @This()) type { - return *const fn (ContextType, self.CommandOutput()) anyerror!void; + return *const fn (ContextType, self.Output()) anyerror!void; } - pub fn CommandOutput(comptime self: @This()) type { + pub fn Output(comptime self: @This()) type { comptime { const spec = self.generate(); var fields: []const StructField = &[0]StructField{}; @@ -380,7 +479,7 @@ fn CommandBuilder(comptime ContextType: type) type { if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) { if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) { - @compileError("redundant flag!!!! " ++ param.name ++ " and " ++ peek_param.name); + @compileError("redundant flag!!!! " ++ param.name); } else { bais_seen[@enumToInt(PeekType.flag_bias)] = true; } @@ -395,9 +494,9 @@ fn CommandBuilder(comptime ContextType: type) type { // the optional wrapper is an interesting idea for required // fields. I do not foresee this greatly increasing complexity here. const FieldType = if (param.required) - std.meta.Child(std.meta.FieldType(PType, .default)) + param.ResultType() else - std.meta.FieldType(PType, .default); + ?param.ResultType(); // the wacky comptime slice extension hack fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{ @@ -441,7 +540,7 @@ fn CommandBuilder(comptime ContextType: type) type { if (PeekType.is_flag and std.mem.eql(u8, param.name, peek_param.name)) { if (bais_seen[@enumToInt(PeekType.flag_bias)] == true) { - @compileError("redundant flag!!!! " ++ param.name ++ " and " ++ peek_param.name); + @compileError("redundant flag!!!! " ++ param.name); } else { bais_seen[@enumToInt(PeekType.flag_bias)] = true; } @@ -452,10 +551,23 @@ fn CommandBuilder(comptime ContextType: type) type { } } - // the wacky comptime slice extension hack + // This needs to be reconciled with options that take many + // arguments. We could make all of these be ArrayLists of string + // slices instead... but that makes the parsing code much more allocation heavy. + // The problem is essentially that `--long=multi,value` and `--long multi value` + // evaluate to a different number of strings for the same number of arguments. + + const FieldType = switch (param.value_count) { + .fixed => |val| switch (val) { + 0, 1 => []const u8, + else => std.ArrayList([]const u8), + }, + else => std.ArrayList([]const u8), + }; + fields = &(@as([fields.len]StructField, fields[0..fields.len].*) ++ [1]StructField{.{ .name = param.name, - .type = ?[]const u8, + .type = ?FieldType, .default_value = @ptrCast(?*const anyopaque, &@as(?[]const u8, null)), .is_comptime = false, .alignment = @alignOf(?[]const u8), @@ -471,8 +583,45 @@ fn CommandBuilder(comptime ContextType: type) type { } } - pub fn bind(comptime self: @This(), comptime callback: self.CallbackSignature()) Parser(self, callback) { - return Parser(self, callback){}; + pub fn bind( + comptime self: @This(), + comptime callback: self.CallbackSignature(), + allocator: std.mem.Allocator, + ) Parser(self, callback) { + return Parser(self, callback){ .allocator = allocator }; + } + }; +} + +fn push_unparsed_multi(comptime T: type, comptime field: []const u8, intermediate: *T, value: []const u8, alloc: std.mem.Allocator) !void { + if (@field(intermediate, field) == null) { + @field(intermediate, field) = std.ArrayList([]const u8).init(alloc); + } + + try @field(intermediate, field).?.append(value); +} + +fn push_unparsed_value(comptime T: type, comptime param: anytype, intermediate: *T, value: []const u8, alloc: std.mem.Allocator) ParseError!void { + switch (comptime param.value_count) { + .fixed => |val| switch (val) { + 0, 1 => @field(intermediate, param.name) = value, + else => push_unparsed_multi(T, param.name, intermediate, value, alloc) catch return ParseError.UnexpectedFailure, + }, + else => push_unparsed_multi(T, param.name, intermediate, value, alloc) catch return ParseError.UnexpectedFailure, + } +} + +fn ParserInterface(comptime ContextType: type) type { + return struct { + ctx: *anyopaque, + methods: *const Interface, + + const Interface = struct { + execute: *const fn (ctx: *anyopaque, context: ContextType) anyerror!void, + }; + + pub fn execute(self: @This(), context: ContextType) anyerror!void { + return try self.methods.execute(self.ctx, context); } }; } @@ -480,72 +629,140 @@ fn CommandBuilder(comptime ContextType: type) type { // the parser is generated by the bind method of the CommandBuilder, so we can // be extremely type-sloppy here, which simplifies the signature. fn Parser(comptime command: anytype, comptime callback: anytype) type { - _ = callback; - return struct { const ContextType = @TypeOf(command).UserContextType; - // let there be fields! we can move some things to runtime. - // We can get some better behavior if we defer converting non-eager - // options until the entire command line has been parsed. However, - // to do that, we effectively have to store the parameters as strings until the - // entire line has been parsed. + const Intermediate = command.Intermediate(); + const Output = command.Output(); - // a goal is to - - intermediate: command.Intermediate() = .{}, + intermediate: Intermediate = .{}, + output: Output = undefined, consumed_args: u32 = 0, + progname: ?[]const u8 = null, + has_global_tags: bool = false, + allocator: std.mem.Allocator, // pub fn add_subcommand(self: *@This(), verb: []const u8, parser: anytype) void { // self.subcommands // } + pub fn interface(self: *@This()) ParserInterface(ContextType) { + return .{ .ctx = self, .methods = &.{ .execute = wrap_execute } }; + } + + fn wrap_execute(ctx: *anyopaque, context: ContextType) anyerror!void { + const self = @ptrCast(*@This(), @alignCast(@alignOf(@This()), ctx)); + return try self.execute(context); + } + + pub fn execute(self: *@This(), context: ContextType) anyerror!void { + const args = try std.process.argsAlloc(self.allocator); + defer std.process.argsFree(self.allocator, args); + var env = try std.process.getEnvMap(self.allocator); + defer env.deinit(); + + if (args.len < 1) return ParseError.EmptyArgs; + + self.progname = args[0]; + try self.parse(args[1..]); + // run eager conversions + // try self.convert_eager() + // run normal conversions + // try self.convert() + // execute callback: + try callback(context, self.output); + + inline for (@typeInfo(@TypeOf(self.intermediate)).Struct.fields) |field| { + // @compileLog(@typeName(field.type)); + if (comptime std.mem.startsWith(u8, @typeName(field.type), "?array_list.ArrayList")) { + if (@field(self.intermediate, field.name)) |list| { + std.debug.print("{s}: [\n", .{field.name}); + for (list.items) |item| std.debug.print(" {s},\n", .{item}); + std.debug.print("]\n", .{}); + } else { + std.debug.print("{s}: null\n", .{field.name}); + } + } else { + std.debug.print("{s}: {?s}\n", .{ field.name, @field(self.intermediate, field.name) }); + } + } + } + pub fn parse( self: *@This(), - alloc: std.mem.Allocator, - argit: *std.process.ArgIterator, - env: std.process.EnvMap, - context: ContextType, + args: [][:0]u8, ) anyerror!void { - _ = alloc; - // _ = context; + // actually: don't consider env variables until performing conversions. This + // is the most reasonable way to treat the environment as a + // separate "namespace" for e.g. multi options. we only want to use + // environment values if there is nothing specified on the CLI, which cannot + // be determined until the CLI parsing is complete. - try self.read_environment(env); + // try self.read_environment(env); + + // run pre-parse pass if we have any global parameters + // try self.preparse() + + var forced_ordinal = false; + var argit = ncmeta.SliceIterator(@TypeOf(args)).wrap(args); + + // there are a LOT of different parsing strategies that can be adopted to + // handle "incorrect" command lines. For example, a --long-style named + // argument could be parsed as an ordered argument if it doesn't match any + // of the specified tag names. However, if the user has not passed `--` + // then it's more likely the erroneous flag is a typo or some other + // erroneous input and should be treated as such. Similarly, handling the + // pair `--long-style --some-value`. if long_style takes one value, + // should --some-value be treated as the value, or should we assume the + // user forgot the value and is specifying a second tag? Getting too clever + // with context (e.g. checking if --some-value is a known tag name) + // probably also violates the principle of least astonishment, as if it + // doesn't match, it could very likely be a typo or other erroneous input. + // In this case we have an out, sort of, as --long-style=--some-value is + // unambiguous in purpose. However, this approach misses for short flags, + // unless we also support a -l=--some-value syntax, which I don't like and + // don't think is a common convention. In this case, I think it is + // reasonable to consume the value without getting fancy, + // e.g. -l --some-value produces 'long_style: "--some-value"'. Odds are, if + // the command line was specified incorrectly, the error will cascade + // through somewhere. + + // another consideration is how to deal with mixed --named and positional + // arguments. Theoretically, fixed quantity positional arguments can be + // unambiguously interspersed with named arguments, but that feels sloppy. + // If a positional argument needs to start with --, we have the -- argument + // to force positional parsing. - var forced_args = false; argloop: while (argit.next()) |arg| { - if (!forced_args and std.mem.eql(u8, arg, "--")) { - forced_args = true; + if (!forced_ordinal and std.mem.eql(u8, arg, "--")) { + forced_ordinal = true; continue :argloop; } - parse_tags: { - if (forced_args or arg.len < 1 or arg[0] != '-') break :parse_tags; - + if (!forced_ordinal and arg.len > 1 and arg[0] == '-') { if (arg.len > 2 and arg[1] == '-') { - try self.parse_long_tag(arg, argit, context); + try self.parse_long_tag(arg, &argit); continue :argloop; } else if (arg.len > 1) { for (arg[1..], 1..) |short, idx| { - // _ = short; - // _ = idx; - try self.parse_short_tag(short, arg.len - idx - 1, argit, context); + try self.parse_short_tag(short, arg.len - idx - 1, &argit); } continue :argloop; } + + // if we've fallen through to here then we will be parsing ordinals + // exclusively from here on out. + forced_ordinal = true; } - try self.parse_argument(arg, argit); + try self.parse_ordinals(arg, &argit); } } inline fn parse_long_tag( self: *@This(), arg: []const u8, - argit: *std.process.ArgIterator, - context: ContextType, + argit: *ncmeta.SliceIterator([][:0]u8), ) ParseError!void { - _ = context; - inline for (comptime command.generate()) |param| { const PType = @TypeOf(param); // removing the comptime here causes the compiler to die @@ -554,23 +771,28 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type { if (comptime PType.is_flag) { if (std.mem.eql(u8, arg, tag)) { - @field(self.intermediate, param.name) = if (comptime PType.flag_bias == .truthy) "true" else "false"; + try self.apply_param_values(param, comptime PType.flag_bias.string(), argit); return; } } else { if (std.mem.startsWith(u8, arg, tag)) match: { - // TODO: handle more than one value - const next = if (arg.len == tag.len) - argit.next() orelse return ParseError.ValueMissing - else if (arg[tag.len] == '=') - arg[tag.len + 1 ..] - else - break :match; + // TODO: in case of --long=value we should split value + // on comma, so e.g. --long=one,two which is kinda docker-style. + // This adds complexity. Note that --long=one,two will also + // parse as a single value because we take a different + // codepath. In that case presumably the converter will choke if + // it needs to. Ideally the multi-value stuff would all be + // shoved into the converter layer, but we can't do that due to + // needing to consume multiple argv values in some cases. This + // could be an opportunity to become opinionated about CLI flag + // styles, but I will not do that for the time being. + if (arg.len == tag.len) { + const next = argit.next() orelse return ParseError.ValueMissing; + try self.apply_param_values(param, next, argit); + } else if (arg[tag.len] == '=') { + try self.apply_fused_values(param, arg[tag.len + 1 ..]); + } else break :match; - @field(self.intermediate, param.name) = next; - // if (comptime param.eager) { - // try param.converter() - // } return; } } @@ -583,11 +805,8 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type { self: *@This(), arg: u8, remaining: usize, - argit: *std.process.ArgIterator, - context: ContextType, + argit: *ncmeta.SliceIterator([][:0]u8), ) ParseError!void { - _ = context; - inline for (comptime command.generate()) |param| { const PType = @TypeOf(param); // removing the comptime here causes the compiler to die @@ -596,15 +815,14 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type { if (comptime PType.is_flag) { if (arg == tag[1]) { - @field(self.intermediate, param.name) = if (comptime PType.flag_bias == .truthy) "true" else "false"; + try self.apply_param_values(param, comptime PType.flag_bias.string(), argit); return; } } else { if (arg == tag[1]) { if (remaining > 0) return ParseError.FusedShortTagValueMissing; const next = argit.next() orelse return ParseError.ValueMissing; - - @field(self.intermediate, param.name) = next; + try self.apply_param_values(param, next, argit); return; } } @@ -613,27 +831,98 @@ fn Parser(comptime command: anytype, comptime callback: anytype) type { return ParseError.UnknownShortTagParameter; } - inline fn parse_argument(self: *@This(), arg: []const u8, argit: *std.process.ArgIterator) ParseError!void { - _ = argit; - + inline fn parse_ordinals( + self: *@This(), + arg: []const u8, + argit: *ncmeta.SliceIterator([][:0]u8), + ) ParseError!void { comptime var arg_index: u32 = 0; inline for (comptime command.generate()) |param| { - if (@TypeOf(param).param_type != .Ordinal) continue; + comptime if (@TypeOf(param).param_type != .Ordinal) continue; if (self.consumed_args == arg_index) { - std.debug.print("n: {s}, c: {d}, i: {d}\n", .{ param.name, self.consumed_args, arg_index }); - @field(self.intermediate, param.name) = arg; + try self.apply_param_values(param, arg, argit); self.consumed_args += 1; return; } + arg_index += 1; } + + // look for subcommands now + } + + inline fn apply_param_values(self: *@This(), comptime param: anytype, value: []const u8, argit: *ncmeta.SliceIterator([][:0]u8)) ParseError!void { + try push_unparsed_value(Intermediate, param, &self.intermediate, value, self.allocator); + switch (comptime param.value_count) { + .fixed => |count| switch (count) { + 0, 1 => return, + else => |total| { + var consumed: u32 = 1; + while (consumed < total) : (consumed += 1) { + const next = argit.next() orelse return ParseError.ValueMissing; + try push_unparsed_value( + Intermediate, + param, + &self.intermediate, + next, + self.allocator, + ); + } + }, + }, + .greedy => { + while (argit.next()) |next| { + try push_unparsed_value( + Intermediate, + param, + &self.intermediate, + next, + self.allocator, + ); + } + }, + } + } + + inline fn apply_fused_values(self: *@This(), comptime param: anytype, value: []const u8) ParseError!void { + switch (comptime param.value_count) { + .fixed => |count| switch (count) { + 0 => return ParseError.UnexpectedValue, + 1 => try push_unparsed_value(Intermediate, param, &self.intermediate, value, self.allocator), + else => |total| { + var seen: u32 = 0; + var iterator = std.mem.split(u8, value, ","); + while (iterator.next()) |next| { + try push_unparsed_value(Intermediate, param, &self.intermediate, next, self.allocator); + seen += 1; + } + if (seen < total) return ParseError.ValueMissing else if (seen > total) return ParseError.UnexpectedValue; + }, + }, + .greedy => { + // huh. this is just an unchecked version of the fixed-many case. + var iterator = std.mem.split(u8, value, ","); + while (iterator.next()) |next| { + try push_unparsed_value(Intermediate, param, &self.intermediate, next, self.allocator); + } + }, + } } fn read_environment(self: *@This(), env: std.process.EnvMap) !void { inline for (comptime command.generate()) |param| { if (comptime param.env_var) |env_var| { - @field(self.intermediate, param.name) = env.get(env_var); + const val = env.get(env_var) orelse return; + + push_unparsed_value( + Intermediate, + param, + &self.intermediate, + val, + self.allocator, + ) catch return ParseError.UnexpectedFailure; + return; } } } @@ -657,6 +946,13 @@ const cli = cmd: { .short_tag = "-t", .long_tag = "--test", .env_var = "NOCLIP_TEST", + .value_count = .{ .fixed = 2 }, + }); + cmd.add_option(Choice, .{ + .name = "choice", + .short_tag = "-c", + .long_tag = "--choice", + .env_var = "NOCLIP_CHOICE", }); cmd.add_flag(.{ .name = "flag", @@ -664,31 +960,27 @@ const cli = cmd: { .falsy = .{ .long_tag = "--no-flag" }, .env_var = "NOCLIP_FLAG", }); - cmd.add_argument([]const u8, .{ .name = "arg" }); - cmd.add_argument([]const u8, .{ .name = "argtwo" }); + cmd.add_argument([]const u8, .{ + .name = "arg", + // .value_count = .{ .fixed = 3 }, + .value_count = .greedy, + }); break :cmd cmd; }; -fn cli_handler(_: void, result: cli.CommandOutput()) !void { +fn cli_handler(_: void, result: cli.Output()) !void { _ = result; + + std.debug.print("callback is working\n", .{}); } pub fn main() !void { - // std.debug.print("hello\n", .{}); - var parser = cli.bind(cli_handler); - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); - const allocator = arena.allocator(); - var argit = try std.process.argsWithAllocator(allocator); - var env = try std.process.getEnvMap(allocator); - _ = argit.next(); - try parser.parse(allocator, &argit, env, {}); - - inline for (@typeInfo(@TypeOf(parser.intermediate)).Struct.fields) |field| { - std.debug.print("{s}: {?s}\n", .{ field.name, @field(parser.intermediate, field.name) }); - } + var parser = cli.bind(cli_handler, allocator); + const iface = parser.interface(); + try iface.execute({}); } diff --git a/source/meta.zig b/source/meta.zig index 028cff1..a0e885a 100644 --- a/source/meta.zig +++ b/source/meta.zig @@ -52,6 +52,41 @@ pub fn enum_length(comptime T: type) comptime_int { return @typeInfo(T).Enum.fields.len; } +pub fn SliceIterator(comptime T: type) type { + // could be expanded to use std.meta.Elem, perhaps + const ResultType = std.meta.Child(T); + + return struct { + index: usize, + data: T, + + pub const InitError = error{}; + + pub fn wrap(value: T) @This() { + return @This(){ .index = 0, .data = value }; + } + + pub fn next(self: *@This()) ?ResultType { + if (self.index == self.data.len) return null; + + defer self.index += 1; + return self.data[self.index]; + } + + pub fn peek(self: *@This()) ?ResultType { + if (self.index == self.data.len) return null; + + return self.data[self.index]; + } + + pub fn skip(self: *@This()) void { + if (self.index == self.data.len) return; + + self.index += 1; + } + }; +} + /// Stores type-erased pointers to items in comptime extensible data structures, /// which allows e.g. assembling a tuple through multiple calls rather than all /// at once.