NOCLIP/source/parser.zig
torque 8ac610ae71
command, parser: try to clean up UserContext type handling
This is a feeble attempt to unify some logic, as I realized that
Command.createInterface had different logic for handling the user
context than Parser did, which broke certain use cases (using a slice
as the context for example).

I'm not convinced this really unifies the logic as much as wraps it in
another layer of indirection, but at least the core problem is solved.
2023-09-10 14:50:44 -07:00

571 lines
23 KiB
Zig

const std = @import("std");
const errors = @import("./errors.zig");
const help = @import("./help.zig");
const ncmeta = @import("./meta.zig");
const ParseError = errors.ParseError;
const NoclipError = errors.NoclipError;
pub const ParserInterface = struct {
const Vtable = struct {
execute: *const fn (parser: *anyopaque, context: *anyopaque) anyerror!void,
parse: *const fn (parser: *anyopaque, context: *anyopaque, name: []const u8, args: [][:0]u8, env: std.process.EnvMap) anyerror!void,
finish: *const fn (parser: *anyopaque, context: *anyopaque) anyerror!void,
addSubcommand: *const fn (parser: *anyopaque, name: []const u8, subcommand: ParserInterface) std.mem.Allocator.Error!void,
getSubcommand: *const fn (parser: *anyopaque, name: []const u8) ?ParserInterface,
describe: *const fn () []const u8,
deinit: *const fn (parser: *anyopaque) void,
deinitTree: *const fn (parser: *anyopaque) void,
};
parser: *anyopaque,
context: *anyopaque,
methods: *const Vtable,
fn create(comptime ParserType: type, parser: *anyopaque, context: *anyopaque) @This() {
return .{
.parser = parser,
.context = context,
.methods = &.{
.execute = ParserType._wrapExecute,
.parse = ParserType._wrapParse,
.finish = ParserType._wrapFinish,
.addSubcommand = ParserType._wrapAddSubcommand,
.getSubcommand = ParserType._wrapGetSubcommand,
.describe = ParserType._wrapDescribe,
.deinit = ParserType._wrapDeinit,
.deinitTree = ParserType._wrapDeinitTree,
},
};
}
pub fn execute(self: @This()) anyerror!void {
return try self.methods.execute(self.parser, self.context);
}
pub fn parse(self: @This(), name: []const u8, args: [][:0]u8, env: std.process.EnvMap) anyerror!void {
return try self.methods.parse(self.parser, self.context, name, args, env);
}
pub fn finish(self: @This()) anyerror!void {
return try self.methods.finish(self.parser, self.context);
}
pub fn addSubcommand(self: @This(), name: []const u8, subcommand: ParserInterface) std.mem.Allocator.Error!void {
return try self.methods.addSubcommand(self.parser, name, subcommand);
}
pub fn getSubcommand(self: @This(), name: []const u8) ?ParserInterface {
return self.methods.getSubcommand(self.parser, name);
}
pub fn describe(self: @This()) []const u8 {
return self.methods.describe();
}
pub fn deinit(self: @This()) void {
self.methods.deinit(self.parser);
}
pub fn deinitTree(self: @This()) void {
self.methods.deinitTree(self.parser);
}
};
pub const CommandMap = std.hash_map.StringHashMap(ParserInterface);
// the parser is generated by the bind method of the CommandBuilder, so we can
// be extremely type-sloppy here, which simplifies the signature.
pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
const UserContext = @TypeOf(command).UserContextType;
const parameters = command.generate();
const Intermediate = command.Intermediate();
const Output = command.Output();
return struct {
const command_description = command.description;
intermediate: Intermediate = .{},
output: Output = undefined,
consumed_args: u32 = 0,
progname: ?[]const u8 = null,
has_global_tags: bool = false,
arena: *std.heap.ArenaAllocator,
allocator: std.mem.Allocator,
subcommands: CommandMap,
subcommand: ?ParserInterface = null,
help_builder: help.HelpBuilder(command),
// This is a slightly annoying hack to work around the fact that there's no way
// to provide a method signature conditionally.
pub usingnamespace InterfaceGen(@This(), @TypeOf(command).ICC);
// This is attached to the struct this way because these are all "private"
// methods that exist exclusively to cast the type-erased interface object back
// into something usable. Their implementations aren't meaningful and just
// cognitively clutter this struct.
pub usingnamespace InterfaceWrappers(@This());
pub fn subparse(
self: *@This(),
context: UserContext,
name: []const u8,
args: [][:0]u8,
env: std.process.EnvMap,
) anyerror!void {
const sliceto = try self.parse(name, args);
try self.readEnvironment(env);
try self.convertEager(context);
if (self.subcommand) |subcommand| {
const grafted_name = try std.mem.join(
self.allocator,
" ",
&[_][]const u8{ name, args[sliceto - 1] },
);
try subcommand.parse(grafted_name, args[sliceto..], env);
} else if (self.subcommands.count() > 0 and command.subcommand_required) {
const stderr = std.io.getStdErr().writer();
try stderr.writeAll("A subcommand is required.\n\n");
self.printHelp(name);
}
}
pub fn finish(self: *@This(), context: UserContext) anyerror!void {
try self.convert(context);
try callback(context, self.output);
if (self.subcommand) |subcommand| try subcommand.finish();
}
pub fn deinit(self: @This()) void {
self.arena.deinit();
self.arena.child_allocator.destroy(self.arena);
}
pub fn deinitTree(self: @This()) void {
var iterator = self.subcommands.valueIterator();
while (iterator.next()) |subcommand| {
subcommand.deinitTree();
}
self.deinit();
}
pub fn addSubcommand(self: *@This(), name: []const u8, parser: ParserInterface) !void {
try self.subcommands.put(name, parser);
}
pub fn getSubcommand(self: @This(), name: []const u8) ?ParserInterface {
return self.subcommands.get(name);
}
pub fn execute(self: *@This(), context: UserContext) anyerror!void {
const args = try std.process.argsAlloc(self.allocator);
var env = try std.process.getEnvMap(self.allocator);
if (args.len < 1) return ParseError.EmptyArgs;
self.progname = std.fs.path.basename(args[0]);
try self.subparse(context, self.progname.?, args[1..], env);
try self.finish(context);
}
fn printValue(self: @This(), value: anytype, comptime indent: []const u8) void {
if (comptime @hasField(@TypeOf(value), "items")) {
std.debug.print("{s}[\n", .{indent});
for (value.items) |item| {
self.printValue(item, indent ++ " ");
}
std.debug.print("{s}]\n", .{indent});
} else {
std.debug.print("{s}{s}\n", .{ indent, value });
}
}
pub fn parse(
self: *@This(),
name: []const u8,
args: [][:0]u8,
) anyerror!usize {
// run pre-parse pass if we have any global parameters
// try self.preparse()
var forced_ordinal = false;
var argit = ncmeta.SliceIterator(@TypeOf(args)).wrap(args);
// there are a LOT of different parsing strategies that can be adopted to
// handle "incorrect" command lines. For example, a --long-style named
// argument could be parsed as an ordered argument if it doesn't match any
// of the specified tag names. However, if the user has not passed `--`
// then it's more likely the erroneous flag is a typo or some other
// erroneous input and should be treated as such. Similarly, handling the
// pair `--long-style --some-value`. if long_style takes one value,
// should --some-value be treated as the value, or should we assume the
// user forgot the value and is specifying a second tag? Getting too clever
// with context (e.g. checking if --some-value is a known tag name)
// probably also violates the principle of least astonishment, as if it
// doesn't match, it could very likely be a typo or other erroneous input.
// In this case we have an out, sort of, as --long-style=--some-value is
// unambiguous in purpose. However, this approach misses for short flags,
// unless we also support a -l=--some-value syntax, which I don't like and
// don't think is a common convention. In this case, I think it is
// reasonable to consume the value without getting fancy,
// e.g. -l --some-value produces 'long_style: "--some-value"'. Odds are, if
// the command line was specified incorrectly, the error will cascade
// through somewhere.
// another consideration is how to deal with mixed --named and positional
// arguments. Theoretically, fixed quantity positional arguments can be
// unambiguously interspersed with named arguments, but that feels sloppy.
// If a positional argument needs to start with --, we have the -- argument
// to force positional parsing.
argloop: while (argit.next()) |arg| {
if (!forced_ordinal and std.mem.eql(u8, arg, "--")) {
forced_ordinal = true;
continue :argloop;
}
if (!forced_ordinal and arg.len > 1 and arg[0] == '-') {
if (arg.len > 2 and arg[1] == '-') {
try self.parseLongTag(name, arg, &argit);
continue :argloop;
} else if (arg.len > 1) {
for (arg[1..], 1..) |short, idx| {
try self.parseShortTag(name, short, arg.len - idx - 1, &argit);
}
continue :argloop;
}
// if we've fallen through to here then we will be parsing ordinals
// exclusively from here on out.
forced_ordinal = true;
}
if (try self.parseOrdinals(arg, &argit)) |subcommand| {
self.subcommand = subcommand;
// TODO: return slice of remaining or offset index
return argit.index;
}
}
return 0;
}
fn parseLongTag(
self: *@This(),
name: []const u8,
arg: [:0]u8,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void {
if (comptime command.help_flag.long_tag) |long|
if (std.mem.eql(u8, arg, long))
self.printHelp(name);
inline for (comptime parameters) |param| {
const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die
comptime if (PType.param_type != .Nominal or param.long_tag == null) continue;
const tag = param.long_tag.?;
if (std.mem.startsWith(u8, arg, tag)) match: {
if (arg.len == tag.len) {
try self.applyParamValues(param, argit, false);
} else if (arg[tag.len] == '=') {
try self.applyFusedValues(param, arg[tag.len + 1 ..]);
} else break :match;
return;
}
}
return ParseError.UnknownLongTagParameter;
}
fn parseShortTag(
self: *@This(),
name: []const u8,
arg: u8,
remaining: usize,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void {
if (comptime command.help_flag.short_tag) |short|
if (arg == short[1])
self.printHelp(name);
inline for (comptime parameters) |param| {
const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die
comptime if (PType.param_type != .Nominal or param.short_tag == null) continue;
const tag = param.short_tag.?;
if (arg == tag[1]) {
if (comptime !PType.is_flag)
if (remaining > 0)
return ParseError.FusedShortTagValueMissing;
try self.applyParamValues(param, argit, false);
return;
}
}
return ParseError.UnknownShortTagParameter;
}
fn parseOrdinals(
self: *@This(),
arg: [:0]u8,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!?ParserInterface {
comptime var arg_index: u32 = 0;
inline for (comptime parameters) |param| {
comptime if (@TypeOf(param).param_type != .Ordinal) continue;
if (self.consumed_args == arg_index) {
argit.rewind();
if (comptime @TypeOf(param).G.multi) {
while (argit.peek()) |_| try self.applyParamValues(param, argit, false);
} else {
try self.applyParamValues(param, argit, false);
}
self.consumed_args += 1;
return null;
}
arg_index += 1;
}
return self.subcommands.get(arg) orelse ParseError.ExtraValue;
}
fn pushIntermediateValue(
self: *@This(),
comptime param: anytype,
// @TypeOf(param).G.IntermediateValue() should work but appears to trigger a
// compiler bug: expected pointer, found 'u1'
value: param.IntermediateValue(),
) ParseError!void {
const gen = @TypeOf(param).G;
if (comptime gen.multi) {
if (@field(self.intermediate, param.name) == null) {
@field(self.intermediate, param.name) = gen.IntermediateType().init(self.allocator);
}
@field(self.intermediate, param.name).?.append(value) catch return ParseError.UnexpectedFailure;
} else if (comptime @TypeOf(param).G.nonscalar()) {
if (@field(self.intermediate, param.name)) |list| list.deinit();
@field(self.intermediate, param.name) = value;
} else {
@field(self.intermediate, param.name) = value;
}
}
fn applyParamValues(
self: *@This(),
comptime param: anytype,
argit: anytype,
bounded: bool,
) ParseError!void {
switch (comptime @TypeOf(param).G.value_count) {
.flag => try self.pushIntermediateValue(param, comptime param.flag_bias.string()),
.count => @field(self.intermediate, param.name) += 1,
.fixed => |count| switch (count) {
0 => return ParseError.ExtraValue,
1 => try self.pushIntermediateValue(param, argit.next() orelse return ParseError.MissingValue),
else => |total| {
var list = std.ArrayList([:0]const u8).initCapacity(self.allocator, total) catch
return ParseError.UnexpectedFailure;
var consumed: u32 = 0;
while (consumed < total) : (consumed += 1) {
const next = argit.next() orelse return ParseError.MissingValue;
list.append(next) catch return ParseError.UnexpectedFailure;
}
if (bounded and argit.next() != null) return ParseError.ExtraValue;
try self.pushIntermediateValue(param, list);
},
},
}
}
fn applyFusedValues(
self: *@This(),
comptime param: anytype,
value: [:0]u8,
) ParseError!void {
var iter = ncmeta.MutatingZSplitter(u8){ .buffer = value, .delimiter = ',' };
return try self.applyParamValues(param, &iter, true);
}
fn readEnvironment(self: *@This(), env: std.process.EnvMap) !void {
inline for (comptime parameters) |param| {
if (comptime param.env_var) |env_var| blk: {
if (@field(self.intermediate, param.name) != null) break :blk;
const val = self.allocator.dupeZ(u8, env.get(env_var) orelse break :blk) catch
return ParseError.UnexpectedFailure;
if (comptime @TypeOf(param).G.value_count == .flag) {
try self.pushIntermediateValue(param, val);
} else {
try self.applyFusedValues(param, val);
}
}
}
}
fn convertEager(self: *@This(), context: UserContext) NoclipError!void {
inline for (comptime parameters) |param| {
if (comptime param.eager) {
try self.convertParam(param, context);
}
}
}
fn convert(self: *@This(), context: UserContext) NoclipError!void {
inline for (comptime parameters) |param| {
if (comptime !param.eager) {
try self.convertParam(param, context);
}
}
}
fn convertParam(self: *@This(), comptime param: anytype, context: UserContext) NoclipError!void {
if (@field(self.intermediate, param.name)) |intermediate| {
var buffer = std.ArrayList(u8).init(self.allocator);
const writer = buffer.writer();
if (comptime @TypeOf(param).has_output) {
@field(self.output, param.name) = param.converter(context, intermediate, writer) catch |err| {
const stderr = std.io.getStdErr().writer();
stderr.print("Error parsing option \"{s}\": {s}\n", .{ param.name, buffer.items }) catch {};
return err;
};
} else {
param.converter(context, intermediate, writer) catch |err| {
const stderr = std.io.getStdErr().writer();
stderr.print("Error parsing option \"{s}\": {s}\n", .{ param.name, buffer.items }) catch {};
return err;
};
}
} else {
if (comptime param.required) {
return ParseError.RequiredParameterMissing;
} else if (comptime @TypeOf(param).has_output) {
if (comptime param.default) |def| {
// this has to be explicitly set because even though we set it as
// the field default, it gets clobbered because self.output is
// initialized as undefined.
@field(self.output, param.name) = def;
} else {
@field(self.output, param.name) = null;
return;
}
}
}
}
fn printHelp(self: *@This(), name: []const u8) noreturn {
defer std.process.exit(0);
const stderr = std.io.getStdErr().writer();
if (self.help_builder.buildMessage(name, self.subcommands)) |message|
stderr.writeAll(message) catch return
else |_|
stderr.writeAll("There was a problem generating the help.") catch return;
}
};
}
fn InterfaceWrappers(comptime ParserType: type) type {
return struct {
inline fn castInterfaceParser(parser: *anyopaque) *ParserType {
return @ptrCast(@alignCast(parser));
}
fn _wrapExecute(parser: *anyopaque, ctx: *anyopaque) anyerror!void {
const self = castInterfaceParser(parser);
const context = self.castContext(ctx);
return try self.execute(context);
}
fn _wrapParse(
parser: *anyopaque,
ctx: *anyopaque,
name: []const u8,
args: [][:0]u8,
env: std.process.EnvMap,
) anyerror!void {
const self = castInterfaceParser(parser);
const context = self.castContext(ctx);
return try self.subparse(context, name, args, env);
}
fn _wrapFinish(parser: *anyopaque, ctx: *anyopaque) anyerror!void {
const self = castInterfaceParser(parser);
const context = self.castContext(ctx);
return try self.finish(context);
}
fn _wrapAddSubcommand(parser: *anyopaque, name: []const u8, subcommand: ParserInterface) !void {
const self = castInterfaceParser(parser);
return self.addSubcommand(name, subcommand);
}
fn _wrapGetSubcommand(parser: *anyopaque, name: []const u8) ?ParserInterface {
const self = castInterfaceParser(parser);
return self.getSubcommand(name);
}
fn _wrapDeinit(parser: *anyopaque) void {
const self = castInterfaceParser(parser);
self.deinit();
}
fn _wrapDeinitTree(parser: *anyopaque) void {
const self = castInterfaceParser(parser);
self.deinitTree();
}
fn _wrapDescribe() []const u8 {
return ParserType.command_description;
}
};
}
// TODO: figure out a better way of consolidating this logic with that in command.zig?
fn InterfaceGen(comptime ParserType: type, comptime ICC: anytype) type {
return switch (ICC) {
.empty => struct {
pub fn interface(self: *ParserType) ParserInterface {
return ParserInterface.create(ParserType, self, @constCast(&void{}));
}
fn castContext(_: ParserType, _: *anyopaque) void {
return void{};
}
},
.pointer => struct {
pub fn interface(self: *ParserType, context: ICC.InputType().?) ParserInterface {
return ParserInterface.create(ParserType, self, @constCast(context));
}
fn castContext(_: ParserType, ctx: *anyopaque) ICC.OutputType() {
return @ptrCast(@alignCast(ctx));
}
},
.value => struct {
pub fn interface(self: *ParserType, context: ICC.InputType().?) ParserInterface {
return ParserInterface.create(ParserType, self, @ptrCast(@constCast(context)));
}
fn castContext(_: ParserType, ctx: *anyopaque) ICC.OutputType() {
return @as(ICC.InputType().?, @ptrCast(@alignCast(ctx))).*;
}
},
};
}