parser: add option for case-insensitive scalar comparison

This does not support unicode case folding, which is very much a
sorry-not-sorry situation because unicode is a disgusting labyrinthine
chaotic hellformat. Actually, our unicode support isn't very good from
the standpoint that we don't do any form of normalization, so
specifying non-ASCII values for scalar comparisons is probably asking
for trouble.
This commit is contained in:
torque 2023-10-18 21:34:07 -07:00
parent 1f75ff6b8a
commit 4690f0b808
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
2 changed files with 38 additions and 11 deletions

View File

@ -64,15 +64,30 @@ pub const Options = struct {
coerce_strings: bool = false,
// Only used by the parseTo family of functions.
// Two lists of strings. Truthy strings will be parsed to boolean true. Falsy
// strings will be parsed to boolean false. All other strings will raise an
// error.
boolean_strings: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
// Two lists of strings. Scalars in a document that match any of the truthy values
// will be parsed to boolean true. Scalars in the document that match any of the
// falsy values will be parsed to boolean false. All other scalar values will raise
// an error if the destination is a boolean type. By default, these comparisons are
// case-sensitive. See the `case_insensitive_scalar_coersion` option to change
// this.
boolean_scalars: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
.truthy = &.{ "true", "True", "yes", "on" },
.falsy = &.{ "false", "False", "no", "off" },
},
null_strings: []const []const u8 = &.{ "null", "nil", "None" },
// Only used by the parseTo family of functions.
// A list of strings. Scalars in the doucment that match any of the values listed
// will be parsed to optional `null`. Any other scalar value will be parsed as the
// optional child type if the destination type is an optional. By default, these
// comparisons are case-sensitive. See the `case_insensitive_scalar_coersion`
// option to change this.
null_scalars: []const []const u8 = &.{ "null", "nil", "None" },
// Only used by the parseTo family of functions.
// Perform ASCII-case-insensitive comparisons for scalars (i.e. `TRUE` in a document
// will match `true` in the boolean scalars. Unicode case folding is not currently
// supported.
case_insensitive_scalar_coersion: bool = false,
// Only used by the parseTo family of functions.
// If true, document scalars that appear to be numbers will attempt to convert into

View File

@ -66,10 +66,17 @@ pub const Value = union(enum) {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.boolean_strings.truthy) |check|
if (std.mem.eql(u8, str, check)) return true;
for (options.boolean_strings.falsy) |check|
if (std.mem.eql(u8, str, check)) return false;
if (options.case_insensitive_scalar_coersion) {
for (options.boolean_strings.truthy) |check|
if (std.ascii.eqlIgnoreCase(str, check)) return true;
for (options.boolean_strings.falsy) |check|
if (std.ascii.eqlIgnoreCase(str, check)) return false;
} else {
for (options.boolean_strings.truthy) |check|
if (std.mem.eql(u8, str, check)) return true;
for (options.boolean_strings.falsy) |check|
if (std.mem.eql(u8, str, check)) return false;
}
return error.BadValue;
},
@ -252,8 +259,13 @@ pub const Value = union(enum) {
switch (self) {
inline .scalar, .string => |str, tag| {
if (tag == .string and !options.coerce_strings) return error.BadValue;
for (options.null_strings) |check|
if (std.mem.eql(u8, str, check)) return null;
if (options.case_insensitive_scalar_coersion) {
for (options.null_strings) |check|
if (std.ascii.eqlIgnoreCase(str, check)) return null;
} else {
for (options.null_strings) |check|
if (std.mem.eql(u8, str, check)) return null;
}
return try self.convertTo(opt.child, allocator, options);
},