From 4690f0b808fa6429cc4a0033d4b24d655cbec145 Mon Sep 17 00:00:00 2001 From: torque Date: Wed, 18 Oct 2023 21:34:07 -0700 Subject: [PATCH] parser: add option for case-insensitive scalar comparison This does not support unicode case folding, which is very much a sorry-not-sorry situation because unicode is a disgusting labyrinthine chaotic hellformat. Actually, our unicode support isn't very good from the standpoint that we don't do any form of normalization, so specifying non-ASCII values for scalar comparisons is probably asking for trouble. --- src/parser.zig | 25 ++++++++++++++++++++----- src/parser/value.zig | 24 ++++++++++++++++++------ 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/parser.zig b/src/parser.zig index 1ab5028..3222bff 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -64,15 +64,30 @@ pub const Options = struct { coerce_strings: bool = false, // Only used by the parseTo family of functions. - // Two lists of strings. Truthy strings will be parsed to boolean true. Falsy - // strings will be parsed to boolean false. All other strings will raise an - // error. - boolean_strings: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{ + // Two lists of strings. Scalars in a document that match any of the truthy values + // will be parsed to boolean true. Scalars in the document that match any of the + // falsy values will be parsed to boolean false. All other scalar values will raise + // an error if the destination is a boolean type. By default, these comparisons are + // case-sensitive. See the `case_insensitive_scalar_coersion` option to change + // this. + boolean_scalars: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{ .truthy = &.{ "true", "True", "yes", "on" }, .falsy = &.{ "false", "False", "no", "off" }, }, - null_strings: []const []const u8 = &.{ "null", "nil", "None" }, + // Only used by the parseTo family of functions. + // A list of strings. Scalars in the doucment that match any of the values listed + // will be parsed to optional `null`. Any other scalar value will be parsed as the + // optional child type if the destination type is an optional. By default, these + // comparisons are case-sensitive. See the `case_insensitive_scalar_coersion` + // option to change this. + null_scalars: []const []const u8 = &.{ "null", "nil", "None" }, + + // Only used by the parseTo family of functions. + // Perform ASCII-case-insensitive comparisons for scalars (i.e. `TRUE` in a document + // will match `true` in the boolean scalars. Unicode case folding is not currently + // supported. + case_insensitive_scalar_coersion: bool = false, // Only used by the parseTo family of functions. // If true, document scalars that appear to be numbers will attempt to convert into diff --git a/src/parser/value.zig b/src/parser/value.zig index c9e09d8..ecafd21 100644 --- a/src/parser/value.zig +++ b/src/parser/value.zig @@ -66,10 +66,17 @@ pub const Value = union(enum) { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; - for (options.boolean_strings.truthy) |check| - if (std.mem.eql(u8, str, check)) return true; - for (options.boolean_strings.falsy) |check| - if (std.mem.eql(u8, str, check)) return false; + if (options.case_insensitive_scalar_coersion) { + for (options.boolean_strings.truthy) |check| + if (std.ascii.eqlIgnoreCase(str, check)) return true; + for (options.boolean_strings.falsy) |check| + if (std.ascii.eqlIgnoreCase(str, check)) return false; + } else { + for (options.boolean_strings.truthy) |check| + if (std.mem.eql(u8, str, check)) return true; + for (options.boolean_strings.falsy) |check| + if (std.mem.eql(u8, str, check)) return false; + } return error.BadValue; }, @@ -252,8 +259,13 @@ pub const Value = union(enum) { switch (self) { inline .scalar, .string => |str, tag| { if (tag == .string and !options.coerce_strings) return error.BadValue; - for (options.null_strings) |check| - if (std.mem.eql(u8, str, check)) return null; + if (options.case_insensitive_scalar_coersion) { + for (options.null_strings) |check| + if (std.ascii.eqlIgnoreCase(str, check)) return null; + } else { + for (options.null_strings) |check| + if (std.mem.eql(u8, str, check)) return null; + } return try self.convertTo(opt.child, allocator, options); },