parser: add option for case-insensitive scalar comparison

This does not support unicode case folding, which is very much a
sorry-not-sorry situation because unicode is a disgusting labyrinthine
chaotic hellformat. Actually, our unicode support isn't very good from
the standpoint that we don't do any form of normalization, so
specifying non-ASCII values for scalar comparisons is probably asking
for trouble.
This commit is contained in:
torque 2023-10-18 21:34:07 -07:00
parent 1f75ff6b8a
commit 4690f0b808
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
2 changed files with 38 additions and 11 deletions

View File

@ -64,15 +64,30 @@ pub const Options = struct {
coerce_strings: bool = false, coerce_strings: bool = false,
// Only used by the parseTo family of functions. // Only used by the parseTo family of functions.
// Two lists of strings. Truthy strings will be parsed to boolean true. Falsy // Two lists of strings. Scalars in a document that match any of the truthy values
// strings will be parsed to boolean false. All other strings will raise an // will be parsed to boolean true. Scalars in the document that match any of the
// error. // falsy values will be parsed to boolean false. All other scalar values will raise
boolean_strings: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{ // an error if the destination is a boolean type. By default, these comparisons are
// case-sensitive. See the `case_insensitive_scalar_coersion` option to change
// this.
boolean_scalars: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
.truthy = &.{ "true", "True", "yes", "on" }, .truthy = &.{ "true", "True", "yes", "on" },
.falsy = &.{ "false", "False", "no", "off" }, .falsy = &.{ "false", "False", "no", "off" },
}, },
null_strings: []const []const u8 = &.{ "null", "nil", "None" }, // Only used by the parseTo family of functions.
// A list of strings. Scalars in the doucment that match any of the values listed
// will be parsed to optional `null`. Any other scalar value will be parsed as the
// optional child type if the destination type is an optional. By default, these
// comparisons are case-sensitive. See the `case_insensitive_scalar_coersion`
// option to change this.
null_scalars: []const []const u8 = &.{ "null", "nil", "None" },
// Only used by the parseTo family of functions.
// Perform ASCII-case-insensitive comparisons for scalars (i.e. `TRUE` in a document
// will match `true` in the boolean scalars. Unicode case folding is not currently
// supported.
case_insensitive_scalar_coersion: bool = false,
// Only used by the parseTo family of functions. // Only used by the parseTo family of functions.
// If true, document scalars that appear to be numbers will attempt to convert into // If true, document scalars that appear to be numbers will attempt to convert into

View File

@ -66,10 +66,17 @@ pub const Value = union(enum) {
switch (self) { switch (self) {
inline .scalar, .string => |str, tag| { inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue; if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.boolean_strings.truthy) |check| if (options.case_insensitive_scalar_coersion) {
if (std.mem.eql(u8, str, check)) return true; for (options.boolean_strings.truthy) |check|
for (options.boolean_strings.falsy) |check| if (std.ascii.eqlIgnoreCase(str, check)) return true;
if (std.mem.eql(u8, str, check)) return false; for (options.boolean_strings.falsy) |check|
if (std.ascii.eqlIgnoreCase(str, check)) return false;
} else {
for (options.boolean_strings.truthy) |check|
if (std.mem.eql(u8, str, check)) return true;
for (options.boolean_strings.falsy) |check|
if (std.mem.eql(u8, str, check)) return false;
}
return error.BadValue; return error.BadValue;
}, },
@ -252,8 +259,13 @@ pub const Value = union(enum) {
switch (self) { switch (self) {
inline .scalar, .string => |str, tag| { inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue; if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.null_strings) |check| if (options.case_insensitive_scalar_coersion) {
if (std.mem.eql(u8, str, check)) return null; for (options.null_strings) |check|
if (std.ascii.eqlIgnoreCase(str, check)) return null;
} else {
for (options.null_strings) |check|
if (std.mem.eql(u8, str, check)) return null;
}
return try self.convertTo(opt.child, allocator, options); return try self.convertTo(opt.child, allocator, options);
}, },