grammar, spelling

parser.value.convertTo: add field converter concept
It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.
2024-06-18 18:33:57 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:24:19 -07:00 · 2024-01-15 22:10:15 -08:00 · 2023-12-01 22:35:18 -08:00
8 changed files with 203 additions and 49 deletions
--- a/build.zig
+++ b/build.zig
@@ -2,11 +2,26 @@ const std = @import("std");

 pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});

    const nice = b.addModule("nice", .{
-        .source_file = .{ .path = "src/nice.zig" },
+        .root_source_file = b.path("src/nice.zig"),
    });

+    const tests = b.addTest(.{
+        .name = "nice-unit-tests",
+        .root_source_file = b.path("tests/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    tests.root_module.addImport("nice", nice);
+
+    const run_main_tests = b.addRunArtifact(tests);
+    const test_step = b.step("test", "Run tests");
+    test_step.dependOn(&b.addInstallArtifact(tests, .{}).step);
+    test_step.dependOn(&run_main_tests.step);
+
    add_examples(b, .{
        .target = target,
        .nice_mod = nice,
@@ -14,7 +29,7 @@ pub fn build(b: *std.Build) void {
 }

 const ExampleOptions = struct {
-    target: std.zig.CrossTarget,
+    target: std.Build.ResolvedTarget,
    nice_mod: *std.Build.Module,
 };

@@ -29,18 +44,18 @@ const examples = [_]Example{
    .{ .name = "reify", .file = "examples/reify.zig" },
 };

-pub fn add_examples(b: *std.build, options: ExampleOptions) void {
+pub fn add_examples(b: *std.Build, options: ExampleOptions) void {
    const example_step = b.step("examples", "build examples");

    inline for (examples) |example| {
        const ex_exe = b.addExecutable(.{
            .name = example.name,
-            .root_source_file = .{ .path = example.file },
+            .root_source_file = b.path(example.file),
            .target = options.target,
            .optimize = .Debug,
        });

-        ex_exe.addModule("nice", options.nice_mod);
+        ex_exe.root_module.addImport("nice", options.nice_mod);
        const install = b.addInstallArtifact(ex_exe, .{});
        example_step.dependOn(&install.step);
    }
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,5 +1,5 @@
 .{
-    .name = "nice-data",
+    .name = "nice",
    .version = "0.1.0-pre",
    .dependencies = .{},
    .paths = .{
--- a/readme.md
+++ b/readme.md
@@ -267,7 +267,6 @@ nests:
 }
 ```

-
 ## Restrictions

 Nice documents must be encoded in valid UTF-8 with no BOM. They must use `LF`-only newlines (`CR` characters are forbidden). Tabs and spaces cannot be mixed for indentation. Indentation *must* adhere to a consistent quantum throughout the whole document, including on comment lines. Nonprinting ASCII characters are forbidden (specifically, any character less than `0x20` (space) except for `0x09` (horizontal tab) and `0x0A` (newline)). Trailing whitespace, including lines consisting only of whitespace, is forbidden, although empty lines are permitted. Some keys and values cannot be represented (for example, map keys cannot start with the character `#`, though map values can).
@@ -286,7 +285,7 @@ Nice is not, and does not try to be, a general-purpose data serialization format

 ### There's No Need to Conquer the World

-Nice has no exhaustive specification or formal grammar. The parser is handwritten, and there are pretty much guaranteed to be some strange edge cases that weren't considered when writing it. Standardization is a good thing, generally speaking, but it's not a goal here. Perhaps this driven by the author's indolence more than deep philosophical zealotry. On the other hand, this paragraph is under the philosophy section.
+Nice has no exhaustive specification or formal grammar. The parser is handwritten, and there are pretty much guaranteed to be some strange edge cases that weren't considered when writing it. Standardization is a good thing, generally speaking, but it's not a goal here. Perhaps this is driven by the author's indolence more than deep philosophical zealotry. On the other hand, this paragraph is under the philosophy section.

 # The Implementation

--- a/src/parser.zig
+++ b/src/parser.zig
@@ -50,7 +50,7 @@ pub const Options = struct {
    // If an empty document is parsed, this defines what value type should be the
    // resulting document root object. The default behavior is to emit an error if the
    // document is empty.
-    default_object: enum { string, list, map, fail } = .fail,
+    default_object: enum { scalar, list, map, fail } = .fail,

    // Only used by the parseTo family of functions.
    // If false, and a mapping contains additional keys that do not map to the fields of
@@ -80,13 +80,11 @@ pub const Options = struct {
    // an error if the destination is a boolean type. By default, these comparisons are
    // case-sensitive. See the `case_insensitive_scalar_coersion` option to change
    // this.
-    boolean_scalars: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
-        .truthy = &.{ "true", "True", "yes", "on" },
-        .falsy = &.{ "false", "False", "no", "off" },
-    },
+    truthy_boolean_scalars: []const []const u8 = &.{ "true", "True", "yes", "on" },
+    falsy_boolean_scalars: []const []const u8 = &.{ "false", "False", "no", "off" },

    // Only used by the parseTo family of functions.
-    // A list of strings. Scalars in the doucment that match any of the values listed
+    // A list of strings. Scalars in the document that match any of the values listed
    // will be parsed to optional `null`. Any other scalar value will be parsed as the
    // optional child type if the destination type is an optional. By default, these
    // comparisons are case-sensitive. See the `case_insensitive_scalar_coersion`
--- a/src/parser/state.zig
+++ b/src/parser/state.zig
@@ -59,7 +59,7 @@ pub const State = struct {

        switch (state.mode) {
            .initial => switch (options.default_object) {
-                .string => state.document.root = Value.emptyString(),
+                .scalar => state.document.root = Value.emptyScalar(),
                .list => state.document.root = Value.newList(arena_alloc),
                .map => state.document.root = Value.newMap(arena_alloc),
                .fail => {
--- a/src/parser/value.zig
+++ b/src/parser/value.zig
@@ -68,6 +68,10 @@ pub const Value = union(enum) {
    map: Map,
    inline_map: Map,

+    pub fn FieldConverter(comptime T: type) type {
+        return *const fn (Value, std.mem.Allocator, Options) error{BadValue}!T;
+    }
+
    pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
        switch (@typeInfo(T)) {
            .Void => {
@@ -82,14 +86,14 @@ pub const Value = union(enum) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
                        if (options.case_insensitive_scalar_coersion) {
-                            for (options.boolean_scalars.truthy) |check|
+                            for (options.truthy_boolean_scalars) |check|
                                if (std.ascii.eqlIgnoreCase(str, check)) return true;
-                            for (options.boolean_scalars.falsy) |check|
+                            for (options.falsy_boolean_scalars) |check|
                                if (std.ascii.eqlIgnoreCase(str, check)) return false;
                        } else {
-                            for (options.boolean_scalars.truthy) |check|
+                            for (options.truthy_boolean_scalars) |check|
                                if (std.mem.eql(u8, str, check)) return true;
-                            for (options.boolean_scalars.falsy) |check|
+                            for (options.falsy_boolean_scalars) |check|
                                if (std.mem.eql(u8, str, check)) return false;
                        }

@@ -209,39 +213,28 @@ pub const Value = union(enum) {
                    .map, .inline_map => |map| {
                        var result: T = undefined;

-                        if (options.ignore_extra_fields) {
-                            inline for (stt.fields) |field| {
-                                if (map.get(field.name)) |value| {
-                                    @field(result, field.name) = try value.convertTo(field.type, allocator, options);
-                                } else if (options.allow_omitting_default_values) {
-                                    if (comptime field.default_value) |def|
-                                        @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
-                                    else
-                                        return error.BadValue;
+                        if (!options.ignore_extra_fields and (map.count() > stt.fields.len))
+                            return error.BadValue;
+
+                        var use_count: usize = 0;
+                        inline for (stt.fields) |field| {
+                            if (map.get(field.name)) |val| {
+                                if (comptime hasFn(T, "niceFieldConverter") and T.niceFieldConverter(field.name) != null) {
+                                    @field(result, field.name) = try T.niceFieldConverter(field.name).?(val, allocator, options);
                                } else {
-                                    return error.BadValue;
+                                    @field(result, field.name) = try val.convertTo(field.type, allocator, options);
                                }
-                            }
-                        } else {
-                            // TODO: consider not cloning the map here. This would
-                            //       result in the requirement that the raw value object
-                            //       not be used after it has been converted to a type,
-                            //       based on the parse options.
-                            var clone = try map.clone();
-                            defer clone.deinit();
-                            inline for (stt.fields) |field| {
-                                if (clone.fetchSwapRemove(field.name)) |kv| {
-                                    @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
-                                } else if (options.allow_omitting_default_values) {
-                                    if (comptime field.default_value) |def|
-                                        @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
-                                    else
-                                        return error.BadValue;
-                                } else return error.BadValue;
-                            }
-                            // there were extra fields in the data
-                            if (clone.count() > 0) return error.BadValue;
+                                use_count += 1;
+                            } else if (options.allow_omitting_default_values) {
+                                if (comptime field.default_value) |def|
+                                    @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
+                                else
+                                    return error.BadValue;
+                            } else return error.BadValue;
                        }
+                        // there were extra fields in the data
+                        if (!options.ignore_extra_fields and (map.count() > use_count))
+                            return error.BadValue;

                        return result;
                    },
--- a/tests/main.zig
+++ b/tests/main.zig
@@ -0,0 +1,5 @@
+comptime {
+    if (@import("builtin").is_test) {
+        _ = @import("./reify.zig");
+    }
+}
--- a/tests/reify.zig
+++ b/tests/reify.zig
@@ -0,0 +1,144 @@
+const std = @import("std");
+
+const nice = @import("nice");
+
+fn reifyScalar(comptime scalar: []const u8, expected: anytype) !void {
+    try reifyScalarWithOptions(scalar, expected, .{});
+}
+
+fn reifyScalarWithOptions(comptime scalar: []const u8, expected: anytype, options: nice.parser.Options) !void {
+    const allocator = std.testing.allocator;
+    var diagnostics = nice.Diagnostics{};
+    const parsed = try nice.parseBufferTo(
+        @TypeOf(expected),
+        allocator,
+        scalar ++ "\n",
+        &diagnostics,
+        options,
+    );
+    defer parsed.deinit();
+
+    try std.testing.expectEqual(expected, parsed.value);
+}
+
+test "reify integer" {
+    try reifyScalar("123", @as(u8, 123));
+    try reifyScalar("0123", @as(u8, 123));
+    try reifyScalar("1_23", @as(u8, 123));
+    try reifyScalar("-01_23", @as(i8, -123));
+}
+
+test "reify hexadecimal" {
+    try reifyScalar("0x123", @as(i64, 0x123));
+    try reifyScalar("0x0123", @as(i64, 0x123));
+    try reifyScalar("0x01_23", @as(i64, 0x123));
+    try reifyScalar("-0x01_23", @as(i64, -0x123));
+}
+
+test "reify octal" {
+    try reifyScalar("0o123", @as(i64, 0o123));
+    try reifyScalar("0o0123", @as(i64, 0o123));
+    try reifyScalar("0o01_23", @as(i64, 0o123));
+    try reifyScalar("-0o01_23", @as(i64, -0o123));
+}
+
+test "reify binary" {
+    try reifyScalar("0b1011", @as(i5, 0b1011));
+    try reifyScalar("0b01011", @as(i5, 0b1011));
+    try reifyScalar("0b010_11", @as(i5, 0b1011));
+    try reifyScalar("-0b010_11", @as(i5, -0b1011));
+}
+
+test "reify float" {
+    try reifyScalar("0.25", @as(f32, 0.25));
+    try reifyScalar("0.2_5", @as(f32, 0.25));
+    try reifyScalar("00.250", @as(f32, 0.25));
+    try reifyScalar("-0.25", @as(f32, -0.25));
+}
+
+test "reify hexfloat" {
+    try reifyScalar("0x0.25", @as(f64, 0x0.25));
+    try reifyScalar("0x0.2_5", @as(f64, 0x0.25));
+    try reifyScalar("0x0.250p1", @as(f64, 0x0.25p1));
+    try reifyScalar("-0x0.25", @as(f64, -0x0.25));
+}
+
+test "reify true" {
+    try reifyScalar("true", true);
+    try reifyScalar("True", true);
+    try reifyScalar("yes", true);
+    try reifyScalar("on", true);
+}
+
+test "reify false" {
+    try reifyScalar("false", false);
+    try reifyScalar("False", false);
+    try reifyScalar("no", false);
+    try reifyScalar("off", false);
+}
+
+test "reify custom true" {
+    const options = nice.parser.Options{ .truthy_boolean_scalars = &.{"correct"} };
+    try reifyScalarWithOptions("correct", true, options);
+}
+
+test "reify true case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("TRUE", true));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("TRUE", true, options);
+}
+
+test "reify custom false" {
+    const options = nice.parser.Options{ .falsy_boolean_scalars = &.{"incorrect"} };
+    try reifyScalarWithOptions("incorrect", false, options);
+}
+
+test "reify false case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("FALSE", false));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("FALSE", false, options);
+}
+
+test "reify null" {
+    try reifyScalar("null", @as(?u8, null));
+    try reifyScalar("nil", @as(?u8, null));
+    try reifyScalar("None", @as(?u8, null));
+}
+
+test "reify custom null" {
+    const options = nice.parser.Options{ .null_scalars = &.{"nothing"} };
+    try reifyScalarWithOptions("nothing", @as(?u8, null), options);
+}
+
+test "reify null case insensitive" {
+    // this is a little weird because when the null string mismatches, it will try to
+    // parse the child optional type and produce either a value or an error from that,
+    // so the error received depends on whether or not the optional child type fails to
+    // parse the given value.
+    try std.testing.expectError(error.InvalidCharacter, reifyScalar("NULL", @as(?u8, null)));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("NULL", @as(?u8, null), options);
+}
+
+test "reify void" {
+    // A void scalar cannot exist on its own as it is not distinguishable from an empty
+    // document.
+    const Void = struct { void: void };
+    try reifyScalar("void:", Void{ .void = void{} });
+}
+
+test "reify void scalar" {
+    const options = nice.parser.Options{ .default_object = .scalar };
+    try reifyScalarWithOptions("", void{}, options);
+}
+
+test "reify enum" {
+    const Enum = enum { one, two };
+    try reifyScalar(".one", Enum.one);
+}
+
+test "reify enum no dot" {
+    const options = nice.parser.Options{ .expect_enum_dot = false };
+    const Enum = enum { one, two };
+    try reifyScalarWithOptions("two", Enum.two, options);
+}
Author	SHA1	Message	Date
torque	e562e30e5e	grammar, spelling	2024-06-18 18:33:57 -07:00
torque	8aaceba484	parser.value.convertTo: add field converter concept It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.	2024-06-18 18:32:22 -07:00
torque	c74d615131	parser.value.convertTo: simplify struct field usage This avoids having to clone the map while maintaining the same conversion strictness.	2024-06-18 18:32:22 -07:00
torque	8ccb2c3a66	build: update for zig-0.13	2024-06-18 18:24:19 -07:00
torque	ad73ea6508	build: update for 0.12.0-dev.2208+4debd4338 I am hoping that by starting to roll over to zig 0.12 now it will be easier to migrate when the release actually happens. Unfortunately, the build system API changed fairly significantly and supporting both 0.11 and 0.12-dev is not very interesting.	2024-01-15 22:10:15 -08:00
torque	875b1b6344	start adding tests	2023-12-01 22:35:18 -08:00
torque	ea52c99fee	parser.Options: split truthy/falsy scalars into separate fields This makes overriding the defaults of just one of truthy or falsy more ergonomic. Previously, when overriding the truthy scalars, the user would also have to specify all of the falsy scalars as well.	2023-12-01 22:33:14 -08:00
torque	dbf2762982	parser: empty document should be scalar, not string I think I originally set this up before I had fully decided on the semantics of scalars vs strings. This option makes much more sense to me because it mirrors the empty value behavior map keys. Without an introducer sequence, it's can't be a string.	2023-12-01 22:31:30 -08:00