NOCLIP/source/parser.zig
torque c3b31b2274
command/parser: sketch out help flag integration
This is a special case flag that cannot be replicated with the normal
machinery. It's much easier to special case it. So here we go.
2023-04-06 18:31:29 -07:00

433 lines
18 KiB
Zig

const std = @import("std");
const ncmeta = @import("./meta.zig");
const errors = @import("./errors.zig");
const ParseError = errors.ParseError;
const NoclipError = errors.NoclipError;
pub const ParserInterface = struct {
const Vtable = struct {
execute: *const fn (parser: *anyopaque, context: *anyopaque) anyerror!void,
parse: *const fn (parser: *anyopaque, context: *anyopaque, args: [][:0]u8, env: std.process.EnvMap) anyerror!void,
finish: *const fn (parser: *anyopaque, context: *anyopaque) anyerror!void,
};
parser: *anyopaque,
context: *anyopaque,
methods: *const Vtable,
pub fn execute(self: @This()) anyerror!void {
return try self.methods.execute(self.parser, self.context);
}
pub fn parse(self: @This(), args: [][:0]u8, env: std.process.EnvMap) anyerror!void {
return try self.methods.parse(self.parser, self.context, args, env);
}
pub fn finish(self: @This()) anyerror!void {
return try self.methods.finish(self.parser, self.context);
}
};
fn InterfaceGen(comptime ParserType: type, comptime UserContext: type) type {
return if (@typeInfo(UserContext) == .Void) struct {
pub fn interface(self: *ParserType) ParserInterface {
return .{
.parser = self,
.context = @constCast(&void{}),
.methods = &.{
.execute = ParserType.wrap_execute,
.parse = ParserType.wrap_parse,
.finish = ParserType.wrap_finish,
},
};
}
} else struct {
pub fn interface(self: *ParserType, context: *UserContext) ParserInterface {
return .{
.parser = self,
.context = context,
.methods = &.{
.execute = ParserType.wrap_execute,
.parse = ParserType.wrap_parse,
.finish = ParserType.wrap_finish,
},
};
}
};
}
// the parser is generated by the bind method of the CommandBuilder, so we can
// be extremely type-sloppy here, which simplifies the signature.
pub fn Parser(comptime command: anytype, comptime callback: anytype) type {
const UserContext = @TypeOf(command).UserContextType;
const parameters = command.generate();
const Intermediate = command.Intermediate();
const Output = command.Output();
return struct {
intermediate: Intermediate = .{},
output: Output = undefined,
consumed_args: u32 = 0,
progname: ?[]const u8 = null,
has_global_tags: bool = false,
allocator: std.mem.Allocator,
subcommands: std.hash_map.StringHashMap(ParserInterface),
subcommand: ?ParserInterface = null,
pub fn add_subcommand(self: *@This(), verb: []const u8, parser: ParserInterface) !void {
try self.subcommands.put(verb, parser);
}
// This is a slightly annoying hack to work around the fact that there's no way to
// provide a method signature conditionally.
pub usingnamespace InterfaceGen(@This(), UserContext);
fn wrap_execute(parser: *anyopaque, ctx: *anyopaque) anyerror!void {
const self = @ptrCast(*@This(), @alignCast(@alignOf(*@This()), parser));
// this is a slightly annoying hack to work around the problem that void has
// 0 alignment, which alignCast chokes on.
const context = if (@alignOf(UserContext) > 0)
@ptrCast(*UserContext, @alignCast(@alignOf(UserContext), ctx))
else
@ptrCast(*UserContext, ctx);
return try self.execute(context);
}
fn wrap_parse(parser: *anyopaque, ctx: *anyopaque, args: [][:0]u8, env: std.process.EnvMap) anyerror!void {
const self = @ptrCast(*@This(), @alignCast(@alignOf(@This()), parser));
const context = if (@alignOf(UserContext) > 0)
@ptrCast(*UserContext, @alignCast(@alignOf(UserContext), ctx))
else
@ptrCast(*UserContext, ctx);
return try self.subparse(context, args, env);
}
fn wrap_finish(parser: *anyopaque, ctx: *anyopaque) anyerror!void {
const self = @ptrCast(*@This(), @alignCast(@alignOf(@This()), parser));
const context = if (@alignOf(UserContext) > 0)
@ptrCast(*UserContext, @alignCast(@alignOf(UserContext), ctx))
else
@ptrCast(*UserContext, ctx);
return try self.finish(context);
}
pub fn subparse(self: *@This(), context: *UserContext, args: [][:0]u8, env: std.process.EnvMap) anyerror!void {
const sliceto = try self.parse(args);
try self.read_environment(env);
try self.convert_eager(context);
inline for (@typeInfo(@TypeOf(self.intermediate)).Struct.fields) |field| {
if (@field(self.intermediate, field.name) == null) {
std.debug.print("{s}: null,\n", .{field.name});
} else {
std.debug.print("{s}: ", .{field.name});
self.print_value(@field(self.intermediate, field.name).?, "");
}
}
if (self.subcommand) |verb| try verb.parse(args[sliceto..], env);
}
pub fn finish(self: *@This(), context: *UserContext) anyerror!void {
try self.convert(context);
try callback(context, self.output);
if (self.subcommand) |verb| try verb.finish();
}
pub fn execute(self: *@This(), context: *UserContext) anyerror!void {
const args = try std.process.argsAlloc(self.allocator);
defer std.process.argsFree(self.allocator, args);
var env = try std.process.getEnvMap(self.allocator);
defer env.deinit();
if (args.len < 1) return ParseError.EmptyArgs;
self.progname = args[0];
try self.subparse(context, args[1..], env);
try self.finish(context);
}
fn print_value(self: @This(), value: anytype, comptime indent: []const u8) void {
if (comptime @hasField(@TypeOf(value), "items")) {
std.debug.print("{s}[\n", .{indent});
for (value.items) |item| {
self.print_value(item, indent ++ " ");
}
std.debug.print("{s}]\n", .{indent});
} else {
std.debug.print("{s}{s}\n", .{ indent, value });
}
}
pub fn parse(
self: *@This(),
args: [][:0]u8,
) anyerror!usize {
// run pre-parse pass if we have any global parameters
// try self.preparse()
var forced_ordinal = false;
var argit = ncmeta.SliceIterator(@TypeOf(args)).wrap(args);
// there are a LOT of different parsing strategies that can be adopted to
// handle "incorrect" command lines. For example, a --long-style named
// argument could be parsed as an ordered argument if it doesn't match any
// of the specified tag names. However, if the user has not passed `--`
// then it's more likely the erroneous flag is a typo or some other
// erroneous input and should be treated as such. Similarly, handling the
// pair `--long-style --some-value`. if long_style takes one value,
// should --some-value be treated as the value, or should we assume the
// user forgot the value and is specifying a second tag? Getting too clever
// with context (e.g. checking if --some-value is a known tag name)
// probably also violates the principle of least astonishment, as if it
// doesn't match, it could very likely be a typo or other erroneous input.
// In this case we have an out, sort of, as --long-style=--some-value is
// unambiguous in purpose. However, this approach misses for short flags,
// unless we also support a -l=--some-value syntax, which I don't like and
// don't think is a common convention. In this case, I think it is
// reasonable to consume the value without getting fancy,
// e.g. -l --some-value produces 'long_style: "--some-value"'. Odds are, if
// the command line was specified incorrectly, the error will cascade
// through somewhere.
// another consideration is how to deal with mixed --named and positional
// arguments. Theoretically, fixed quantity positional arguments can be
// unambiguously interspersed with named arguments, but that feels sloppy.
// If a positional argument needs to start with --, we have the -- argument
// to force positional parsing.
argloop: while (argit.next()) |arg| {
if (!forced_ordinal and std.mem.eql(u8, arg, "--")) {
forced_ordinal = true;
continue :argloop;
}
if (!forced_ordinal and arg.len > 1 and arg[0] == '-') {
if (arg.len > 2 and arg[1] == '-') {
try self.parse_long_tag(arg, &argit);
continue :argloop;
} else if (arg.len > 1) {
for (arg[1..], 1..) |short, idx| {
try self.parse_short_tag(short, arg.len - idx - 1, &argit);
}
continue :argloop;
}
// if we've fallen through to here then we will be parsing ordinals
// exclusively from here on out.
forced_ordinal = true;
}
if (try self.parse_ordinals(arg, &argit)) |verb| {
self.subcommand = verb;
// TODO: return slice of remaining or offset index
return argit.index;
}
}
return 0;
}
inline fn parse_long_tag(
self: *@This(),
arg: []const u8,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void {
if (comptime command.help_flag.long_tag) |long|
if (std.mem.eql(u8, arg, long))
self.print_help();
inline for (comptime parameters) |param| {
const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die
comptime if (PType.param_type != .Nominal or param.long_tag == null) continue;
const tag = param.long_tag.?;
if (std.mem.startsWith(u8, arg, tag)) match: {
if (arg.len == tag.len) {
try self.apply_param_values(param, argit, false);
} else if (arg[tag.len] == '=') {
try self.apply_fused_values(param, arg[tag.len + 1 ..]);
} else break :match;
return;
}
}
return ParseError.UnknownLongTagParameter;
}
inline fn parse_short_tag(
self: *@This(),
arg: u8,
remaining: usize,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!void {
if (comptime command.help_flag.short_tag) |short|
if (arg == short[1])
self.print_help();
inline for (comptime parameters) |param| {
const PType = @TypeOf(param);
// removing the comptime here causes the compiler to die
comptime if (PType.param_type != .Nominal or param.short_tag == null) continue;
const tag = param.short_tag.?;
if (arg == tag[1]) {
if (comptime !PType.is_flag)
if (remaining > 0)
return ParseError.FusedShortTagValueMissing;
try self.apply_param_values(param, argit, false);
return;
}
}
return ParseError.UnknownShortTagParameter;
}
inline fn parse_ordinals(
self: *@This(),
arg: []const u8,
argit: *ncmeta.SliceIterator([][:0]u8),
) ParseError!?ParserInterface {
comptime var arg_index: u32 = 0;
inline for (comptime parameters) |param| {
comptime if (@TypeOf(param).param_type != .Ordinal) continue;
if (self.consumed_args == arg_index) {
argit.rewind();
if (comptime @TypeOf(param).G.multi) {
while (argit.peek()) |_| try self.apply_param_values(param, argit, false);
} else {
try self.apply_param_values(param, argit, false);
}
self.consumed_args += 1;
return null;
}
arg_index += 1;
}
return self.subcommands.get(arg) orelse ParseError.ExtraValue;
}
inline fn push_intermediate_value(
self: *@This(),
comptime param: anytype,
// @TypeOf(param).G.IntermediateValue() should work but appears to trigger a
// compiler bug: expected pointer, found 'u1'
value: param.IntermediateValue(),
) ParseError!void {
const gen = @TypeOf(param).G;
if (comptime gen.multi) {
if (@field(self.intermediate, param.name) == null) {
@field(self.intermediate, param.name) = gen.IntermediateType().init(self.allocator);
}
@field(self.intermediate, param.name).?.append(value) catch return ParseError.UnexpectedFailure;
} else if (comptime @TypeOf(param).G.nonscalar()) {
if (@field(self.intermediate, param.name)) |list| list.deinit();
@field(self.intermediate, param.name) = value;
} else {
@field(self.intermediate, param.name) = value;
}
}
inline fn apply_param_values(
self: *@This(),
comptime param: anytype,
argit: anytype,
bounded: bool,
) ParseError!void {
switch (comptime @TypeOf(param).G.value_count) {
.flag => try self.push_intermediate_value(param, comptime param.flag_bias.string()),
.count => @field(self.intermediate, param.name) += 1,
.fixed => |count| switch (count) {
0 => return ParseError.ExtraValue,
1 => try self.push_intermediate_value(param, argit.next() orelse return ParseError.MissingValue),
else => |total| {
var list = std.ArrayList([]const u8).initCapacity(self.allocator, total) catch
return ParseError.UnexpectedFailure;
var consumed: u32 = 0;
while (consumed < total) : (consumed += 1) {
const next = argit.next() orelse return ParseError.MissingValue;
list.append(next) catch return ParseError.UnexpectedFailure;
}
if (bounded and argit.next() != null) return ParseError.ExtraValue;
try self.push_intermediate_value(param, list);
},
},
}
}
inline fn apply_fused_values(
self: *@This(),
comptime param: anytype,
value: []const u8,
) ParseError!void {
var iter = std.mem.split(u8, value, ",");
return try self.apply_param_values(param, &iter, true);
}
fn read_environment(self: *@This(), env: std.process.EnvMap) !void {
inline for (comptime parameters) |param| {
if (comptime param.env_var) |env_var| blk: {
if (@field(self.intermediate, param.name) != null) break :blk;
const val = env.get(env_var) orelse break :blk;
if (comptime @TypeOf(param).G.value_count == .flag) {
try self.push_intermediate_value(param, val);
} else {
try self.apply_fused_values(param, val);
}
}
}
}
fn convert_eager(self: *@This(), context: *UserContext) NoclipError!void {
inline for (comptime parameters) |param| {
if (comptime param.eager) {
try self.convert_param(param, context);
}
}
}
fn convert(self: *@This(), context: *UserContext) NoclipError!void {
inline for (comptime parameters) |param| {
if (comptime !param.eager) {
try self.convert_param(param, context);
}
}
}
fn convert_param(self: *@This(), comptime param: anytype, context: *UserContext) NoclipError!void {
if (@field(self.intermediate, param.name)) |intermediate| {
@field(self.output, param.name) = try param.converter(context, intermediate);
} else {
if (comptime param.required) {
return ParseError.RequiredParameterMissing;
} else if (comptime param.default) |def| {
// this has to be explicitly set because even though we set it as
// the field default, it gets clobbered because self.output is
// initialized as undefined.
@field(self.output, param.name) = def;
} else {
@field(self.output, param.name) = null;
return;
}
}
}
fn print_help(self: @This()) void {
_ = self;
std.debug.print("help!!!\n", .{});
std.process.exit(0);
}
};
}