grammar, spelling

parser.value.convertTo: add field converter concept
It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.
2024-06-18 18:33:57 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:24:19 -07:00 · 2024-01-15 22:10:15 -08:00 · 2023-12-01 22:35:18 -08:00
9 changed files with 320 additions and 101 deletions
--- a/build.zig
+++ b/build.zig
@@ -2,11 +2,26 @@ const std = @import("std");

 pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});

    const nice = b.addModule("nice", .{
-        .source_file = .{ .path = "src/nice.zig" },
+        .root_source_file = b.path("src/nice.zig"),
    });

+    const tests = b.addTest(.{
+        .name = "nice-unit-tests",
+        .root_source_file = b.path("tests/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    tests.root_module.addImport("nice", nice);
+
+    const run_main_tests = b.addRunArtifact(tests);
+    const test_step = b.step("test", "Run tests");
+    test_step.dependOn(&b.addInstallArtifact(tests, .{}).step);
+    test_step.dependOn(&run_main_tests.step);
+
    add_examples(b, .{
        .target = target,
        .nice_mod = nice,
@@ -14,7 +29,7 @@ pub fn build(b: *std.Build) void {
 }

 const ExampleOptions = struct {
-    target: std.zig.CrossTarget,
+    target: std.Build.ResolvedTarget,
    nice_mod: *std.Build.Module,
 };

@@ -29,18 +44,18 @@ const examples = [_]Example{
    .{ .name = "reify", .file = "examples/reify.zig" },
 };

-pub fn add_examples(b: *std.build, options: ExampleOptions) void {
+pub fn add_examples(b: *std.Build, options: ExampleOptions) void {
    const example_step = b.step("examples", "build examples");

    inline for (examples) |example| {
        const ex_exe = b.addExecutable(.{
            .name = example.name,
-            .root_source_file = .{ .path = example.file },
+            .root_source_file = b.path(example.file),
            .target = options.target,
            .optimize = .Debug,
        });

-        ex_exe.addModule("nice", options.nice_mod);
+        ex_exe.root_module.addImport("nice", options.nice_mod);
        const install = b.addInstallArtifact(ex_exe, .{});
        example_step.dependOn(&install.step);
    }
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,5 +1,12 @@
 .{
-    .name = "nice-data",
+    .name = "nice",
    .version = "0.1.0-pre",
    .dependencies = .{},
+    .paths = .{
+        "src",
+        "build.zig",
+        "build.zig.zon",
+        "license",
+        "readme.md",
+    },
 }
--- a/examples/reify.zig
+++ b/examples/reify.zig
@@ -5,18 +5,23 @@ const std = @import("std");

 const nice = @import("nice");

+const Enum = enum { first, second, third };
+const TagUnion = union(Enum) { first: []const u8, second: i32, third: void };
+
 const Example = struct {
    useful: bool,
    number: i32,
    string: []const u8,
    longstring: [:0]const u8,
    tuple: struct { bool, i8 },
-    enume: enum { first, second, third },
-    taggart: union(enum) { first: []const u8, second: i32 },
+    enume: Enum,
+    taggart: TagUnion,
+    voidtag: TagUnion,
    exist: ?bool,
    again: ?bool,
    array: [5]i16,
    nested: [3]struct { index: usize, title: []const u8 },
+    default: u64 = 0xDEADCAFE,
 };

 const source =
@@ -32,9 +37,9 @@ const source =
    \\	# and a trailing newline for good measure
    \\	>
    \\tuple: [ no, 127 ]
-    \\enume: .third
-    \\taggart:
-    \\	first: string a thing
+    \\enume: .second
+    \\taggart: {.first: string a thing}
+    \\voidtag: .third
    \\list:
    \\	- I am a list item
    \\exist: null
@@ -69,11 +74,18 @@ pub fn main() !void {
    std.debug.print("    string: {s}\n", .{loaded.value.string});
    std.debug.print("    longstring: {s}\n", .{loaded.value.longstring});
    std.debug.print("    tuple: {{ {}, {d} }}\n", .{ loaded.value.tuple[0], loaded.value.tuple[1] });
-    std.debug.print("    enume: {s}\n", .{@tagName(loaded.value.enume)});
+    std.debug.print("    enume: .{s}\n", .{@tagName(loaded.value.enume)});
    std.debug.print("    taggart: ", .{});
    switch (loaded.value.taggart) {
        .first => |val| std.debug.print(".first = {s}\n", .{val}),
        .second => |val| std.debug.print(".second = {d}\n", .{val}),
+        .third => std.debug.print(".third\n", .{}),
+    }
+    std.debug.print("    voidtag: ", .{});
+    switch (loaded.value.voidtag) {
+        .first => |val| std.debug.print(".first = {s}\n", .{val}),
+        .second => |val| std.debug.print(".second = {d}\n", .{val}),
+        .third => std.debug.print(".third\n", .{}),
    }
    std.debug.print("    exist: {?}\n", .{loaded.value.exist});
    std.debug.print("    again: {?}\n", .{loaded.value.again});
@@ -87,5 +99,6 @@ pub fn main() !void {
        std.debug.print("        {{ index: {d}, title: {s} }}\n", .{ item.index, item.title });
    }
    std.debug.print("    ]\n", .{});
+    std.debug.print("    default: 0x{X}\n", .{loaded.value.default});
    std.debug.print("}}\n", .{});
 }
--- a/readme.md
+++ b/readme.md
@@ -47,7 +47,7 @@ A scalar value is a sequence of valid UTF-8 codepoints. Scalars cannot contain l

 A string value is very similar to a scalar value, except that it is started by a leader character sequence and ended with trailer character sequence. Strings may be spread across multiple lines (here, we call each line a string fragment), and each fragment must start with a leader and end with the trailer. Strings fragments respect leading whitespace (after the leader sequence), unlike scalars. The trailer may be used to include trailing whitespace in a fragment. Comments may be interspersed between the fragments that compose a string (demonstrated below).

-The string leader sequence consists of an ASCII character followed by a single ASCII space. The space must be omitted if the fragment contains no other characters (because otherwise it would be trailing whitespace, which is forbidden). The leader sequence defines how the lines of the string are concatenated together, as follows:
+The string leader sequence consists of an ASCII character followed by a single ASCII space. The space must be omitted if the fragment contains no other characters (because otherwise it would be trailing whitespace, which is forbidden). The leader sequence defines how the fragments of the string are concatenated together, as follows:

 - `| ` specifies that this fragment of the string should be directly concatenated onto the previous fragment.

@@ -119,7 +119,7 @@ parses to the following JSON structure:
 ["a list", "containing", "", "several values"]
 ```

-There are a couple of new concepts here. The first new concept is demonstrated in the second value, which is an inline string. This is a standard string fragment that appears on the same line after another introducer (either a list item introducer, as in this example, or a map key introducer, which will be demonstrated in the section describing maps). The only difference between an inline string and a normal string as discussed above is that the inline string may is composed of only a single fragment (meaning it cannot be spread across multiple lines). The string leader used has no effect on an inline string, since the leader is not applied.
+There are a couple of new concepts here. The first new concept is demonstrated in the second value, which is an inline string. This is a standard string fragment that appears on the same line after another introducer (either a list item introducer, as in this example, or a map key introducer, which will be demonstrated in the section describing maps). The only difference between an inline string and a normal string as discussed above is that the inline string is composed of only a single fragment (meaning it cannot be spread across multiple lines). The string leader used has no effect on an inline string, since the leader is not applied.

 The other new concept is structural indentation. The fourth list item contains an indented string following a list item introducer that does not contain an inline value. Because the string sequence is indented, it belongs to the list item introduced immediately before it. Note that an indented sequence following an introducer that contains an inline value is a syntactic error. That is, the following document **cannot** be parsed:

@@ -167,7 +167,7 @@ The Nice document is similar in layout to its indented JSON counterpart but cont

 Inline lists allow a list to be specified in a more concise form on a line following another item introducer (either a list item introducer or a map item introducer). They consist of a comma-separated sequence of scalars within a pair of square brackets (`[` and `]`). Inline lists may also contain other inline lists and inline maps (discussed later), but they cannot contain strings. Whitespace before and after values in an inline list is ignored, though whitespace within a value is preserved. Inline list values may not contain commas. For reasons related to intellectual bankruptcy, `[]` and `[ ]` are distinct values, just as they are in NestedText. `[]` represents an empty list, while `[ ]` represents a list containing a single empty string. As is hopefully suggested by the name, an inline list *must* be specified on a single line.

-Inline lists are provided for when a document may benefit to having horizontal layout rather than vertical. It can also be used tactically to improve readability in other ways, but should not, in general, be preferred over standard lists. Here's the previous example, with a bit less indentation thanks to use of inline lists:
+Inline lists are provided for when some parts of a document may benefit from having horizontal layout rather than vertical layout. It can also be used tactically to improve readability in other ways, but should not, in general, be preferred over standard lists. Here's the previous example, with a bit less indentation thanks to use of inline lists:

 ```nice
 - start the parent
@@ -232,7 +232,9 @@ This maps to the following JSON structure:
 }
 ```

-Serialized maps are inherently ordered, but the data structures they represent do not necessarily preserve order. Nice preserves the order of the map keys as they were encountered in the document. ASCII spaces following the key scalar will be ignored, allowing adjacent values to be justified. The key scalar itself may not contain trailing or leading whitespace. A line only ever contains a single key scalar, unlike YAML. Maps must be nested using structural indentation.
+Serialized maps are inherently ordered, but the data structures they represent do not necessarily preserve order. Nice guarantees that the order of the map keys, as they were encountered in the document, is preserved. Serialized maps can also represent multiple entries that have the same key. This is not generally useful (if you need to have multiple values for a given key, its corresponding value should be a list) and cannot typically be represented by a map data structure. The Nice parser can be configured to produce a parse error when a duplicate key is encountered (the default behavior) or it can preserve either only first encountered duplicate value or only the last encountered duplicate value (in this case, the map order preserves the index of the last encountered duplicate, which may be less efficient if many duplicates exist, since it requires performing an ordered remove on the previously encountered instance).
+
+ASCII spaces following the key scalar will be ignored, allowing adjacent values to be justified. The key scalar itself may not contain trailing or leading whitespace. A line only ever contains a single key scalar, unlike YAML. Maps must be nested using structural indentation.

 ```nice
 fully aligned: value: 1
@@ -248,7 +250,7 @@ values:        value: 2

 ### Inline Maps

-The final syntactic construct is the inline map which is, as its name hopefully suggests, the map equivalent of an inline list. An inline map is introduced by an opening curly brace `{` and closed by an opposing brace `}`. An inline map consists of a sequence of key-value pairs with the keys being separated from the values by the `:` character. An inline map may contain scalars, inline lists, and other inline maps as values, and all of its keys must be scalars. As with inline lists, whitespace surrounding values is ignored, and whitespace preceding keys is also ignored (there must be no whitespace between the key and its following `:`).
+The final syntactic construct is the inline map, which is, as its name hopefully suggests, the map equivalent of an inline list. An inline map is introduced by an opening curly brace `{` and closed by an opposing brace `}`. An inline map consists of a sequence of key-value pairs with the keys being separated from the values by the `:` character. An inline map may contain scalars, inline lists, and other inline maps as values, and all of its keys must be scalars. As with inline lists, whitespace surrounding values is ignored, and whitespace preceding keys is also ignored (there must be no whitespace between the key and its following `:`).

 ```nice
 an example: { this: is, an inline: map }
@@ -265,10 +267,9 @@ nests:
 }
 ```

-
 ## Restrictions

-Nice documents must be encoded in valid UTF-8. They must use `LF`-only newlines (`CR` characters are forbidden). Tabs and spaces cannot be mixed for indentation. Indentation *must* adhere to a consistent quantum throughout the whole document, including on comment lines. Nonprinting ASCII characters are forbidden (specifically, any character less than `0x20` (space) except for `0x09` (horizontal tab) and `0x0A` (newline)). Trailing whitespace, including lines consisting only of whitespace, is forbidden, although empty lines are permitted. Some keys and values cannot be represented (for example, map keys cannot start with the character `#`, though map values can).
+Nice documents must be encoded in valid UTF-8 with no BOM. They must use `LF`-only newlines (`CR` characters are forbidden). Tabs and spaces cannot be mixed for indentation. Indentation *must* adhere to a consistent quantum throughout the whole document, including on comment lines. Nonprinting ASCII characters are forbidden (specifically, any character less than `0x20` (space) except for `0x09` (horizontal tab) and `0x0A` (newline)). Trailing whitespace, including lines consisting only of whitespace, is forbidden, although empty lines are permitted. Some keys and values cannot be represented (for example, map keys cannot start with the character `#`, though map values can).

 ## Philosophy

@@ -284,11 +285,11 @@ Nice is not, and does not try to be, a general-purpose data serialization format

 ### There's No Need to Conquer the World

-Nice has no exhaustive specification or formal grammar. The parser is handwritten, and there are pretty much guaranteed to be some strange edge cases that weren't considered when writing it. Standardization is a good thing, generally speaking, but it's not a goal here. Perhaps this driven by the author's indolence more than deep philosophical zealotry. On the other hand, this paragraph is under the philosophy section.
+Nice has no exhaustive specification or formal grammar. The parser is handwritten, and there are pretty much guaranteed to be some strange edge cases that weren't considered when writing it. Standardization is a good thing, generally speaking, but it's not a goal here. Perhaps this is driven by the author's indolence more than deep philosophical zealotry. On the other hand, this paragraph is under the philosophy section.

 # The Implementation

-The Reference™ Nice parser/deserializer is this Zig library. It contains a handwritten nonrecursive parser to a generic data structure (`nice.Value`, a tagged union that can represent a scalar, a string, a list of these generic values, or a map of scalars to these generic values). The included example scripts demonstrate how to use the API. See `examples/parse.zig` for one-shot parsing from a slice. `examples/stream.zig` demonstrates how to parse streaming data that does not require loading a whole document into memory at once. This is slower will generally have a lower peak memory usage (which is mainly driven by the size of the document).
+The Reference™ Nice parser/deserializer is this Zig library. It contains a handwritten nonrecursive parser to a generic data structure (`nice.Value`, a tagged union that can represent a scalar, a string, a list of these generic values, or a map of scalars to these generic values). The included example scripts demonstrate how to use the API. See `examples/parse.zig` for one-shot parsing from a slice. `examples/stream.zig` demonstrates how to parse streaming data that does not require loading a whole document into memory at once. This is slower but will generally have a lower peak memory usage (though that is mainly driven by the size of the document).

 `nice.Value` has a method to recursively be converted into a strongly
 typed user-defined structure. Zig's compile-time reflection is used to generate code to perform appropriate type conversion. There a variety of options which can be used to control specific details of the conversion, which are governed by `nice.parser.Options`. `examples/reify.zig` demonstrates basic use of this functionality.
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -50,7 +50,7 @@ pub const Options = struct {
    // If an empty document is parsed, this defines what value type should be the
    // resulting document root object. The default behavior is to emit an error if the
    // document is empty.
-    default_object: enum { string, list, map, fail } = .fail,
+    default_object: enum { scalar, list, map, fail } = .fail,

    // Only used by the parseTo family of functions.
    // If false, and a mapping contains additional keys that do not map to the fields of
@@ -61,11 +61,11 @@ pub const Options = struct {
    ignore_extra_fields: bool = true,

    // Only used by the parseTo family of functions.
-    // If true, if a struct field is an optional type and the corresponding mapping key
-    // does not exist, the object field will be set to `null`. By default, if the
-    // parsed document is missing a mapping key for a given field, an error will be
-    // raised instead.
-    treat_omitted_as_null: bool = false,
+    // If true, if a struct field has a default value associated with it and the
+    // corresponding mapping key does not exist, the object field will be set to the
+    // default value. By default, this behavior is enabled, allowing succinct
+    // representation of objects that have default fields.
+    allow_omitting_default_values: bool = true,

    // Only used by the parseTo family of functions.
    // If true, strings may be coerced into other scalar types, like booleans or
@@ -80,13 +80,11 @@ pub const Options = struct {
    // an error if the destination is a boolean type. By default, these comparisons are
    // case-sensitive. See the `case_insensitive_scalar_coersion` option to change
    // this.
-    boolean_scalars: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
-        .truthy = &.{ "true", "True", "yes", "on" },
-        .falsy = &.{ "false", "False", "no", "off" },
-    },
+    truthy_boolean_scalars: []const []const u8 = &.{ "true", "True", "yes", "on" },
+    falsy_boolean_scalars: []const []const u8 = &.{ "false", "False", "no", "off" },

    // Only used by the parseTo family of functions.
-    // A list of strings. Scalars in the doucment that match any of the values listed
+    // A list of strings. Scalars in the document that match any of the values listed
    // will be parsed to optional `null`. Any other scalar value will be parsed as the
    // optional child type if the destination type is an optional. By default, these
    // comparisons are case-sensitive. See the `case_insensitive_scalar_coersion`
@@ -99,7 +97,9 @@ pub const Options = struct {
    // look like source code enum literals. Any enum value missing the leading `.` will
    // result in a conversion error. If set to false, no preprocessing will be done
    // and enum values will be converted from the literal scalar/string. These two styles
-    // cannot be mixed in a single document.
+    // cannot be mixed in a single document. Note that this setting also affects how
+    // tagged unions are parsed (specifically, the union's field name must also have the
+    // leading `.` if this option is enabled.)
    expect_enum_dot: bool = true,

    // Only used by the parseTo family of functions.
--- a/src/parser/state.zig
+++ b/src/parser/state.zig
@@ -59,7 +59,7 @@ pub const State = struct {

        switch (state.mode) {
            .initial => switch (options.default_object) {
-                .string => state.document.root = Value.emptyString(),
+                .scalar => state.document.root = Value.emptyScalar(),
                .list => state.document.root = Value.newList(arena_alloc),
                .map => state.document.root = Value.newMap(arena_alloc),
                .fail => {
@@ -70,7 +70,7 @@ pub const State = struct {
            },
            .value => switch (state.value_stack.getLast().*) {
                // we have an in-progress string, finish it.
-                .string => |*string| string.* = try state.string_builder.toOwnedSlice(arena_alloc),
+                .string => |*string| string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0),
                // if we have a dangling -, attach an empty scalar to it
                .list => |*list| if (state.expect_shift == .indent) try list.append(Value.emptyScalar()),
                // if we have a dangling "key:", attach an empty scalar to it
@@ -185,7 +185,7 @@ pub const State = struct {

                        if (firstpass and line.shift == .dedent) {
                            // copy the string into the document proper
-                            string.* = try state.string_builder.toOwnedSlice(arena_alloc);
+                            string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0);

                            var dedent_depth = line.shift.dedent;
                            while (dedent_depth > 0) : (dedent_depth -= 1)
@@ -199,9 +199,9 @@ pub const State = struct {
                            .in_line => |in_line| switch (in_line) {
                                .empty => unreachable,
                                inline .line_string, .space_string, .concat_string => |str, tag| {
-                                    if (tag == .line_string)
+                                    if (comptime tag == .line_string)
                                        try state.string_builder.append(arena_alloc, '\n');
-                                    if (tag == .space_string)
+                                    if (comptime tag == .space_string)
                                        try state.string_builder.append(arena_alloc, ' ');
                                    try state.string_builder.appendSlice(arena_alloc, str);
                                },
@@ -802,7 +802,10 @@ pub const State = struct {
                    return error.DuplicateKey;
                },
                .use_first => {},
-                .use_last => gop.value_ptr.* = value,
+                .use_last => {
+                    _ = map.orderedRemove(key);
+                    map.putAssumeCapacityNoClobber(key, value);
+                },
            }
        else
            gop.value_ptr.* = value;
--- a/src/parser/value.zig
+++ b/src/parser/value.zig
@@ -9,6 +9,11 @@
 // CONDITIONS OF ANY KIND, either express or implied.

 const std = @import("std");
+const hasFn = if (@hasDecl(std.meta, "trait")) struct {
+    fn hasFn(comptime T: type, comptime name: []const u8) bool {
+        return std.meta.trait.hasFn(name)(T);
+    }
+}.hasFn else std.meta.hasFn;

 const Options = @import("../parser.zig").Options;

@@ -51,7 +56,7 @@ pub fn Parsed(comptime T: type) type {
 }

 pub const Value = union(enum) {
-    pub const String = []const u8;
+    pub const String = [:0]const u8;
    pub const Map = std.StringArrayHashMap(Value);
    pub const List = std.ArrayList(Value);
    pub const TagType = @typeInfo(Value).Union.tag_type.?;
@@ -63,6 +68,10 @@ pub const Value = union(enum) {
    map: Map,
    inline_map: Map,

+    pub fn FieldConverter(comptime T: type) type {
+        return *const fn (Value, std.mem.Allocator, Options) error{BadValue}!T;
+    }
+
    pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
        switch (@typeInfo(T)) {
            .Void => {
@@ -77,14 +86,14 @@ pub const Value = union(enum) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
                        if (options.case_insensitive_scalar_coersion) {
-                            for (options.boolean_scalars.truthy) |check|
+                            for (options.truthy_boolean_scalars) |check|
                                if (std.ascii.eqlIgnoreCase(str, check)) return true;
-                            for (options.boolean_scalars.falsy) |check|
+                            for (options.falsy_boolean_scalars) |check|
                                if (std.ascii.eqlIgnoreCase(str, check)) return false;
                        } else {
-                            for (options.boolean_scalars.truthy) |check|
+                            for (options.truthy_boolean_scalars) |check|
                                if (std.mem.eql(u8, str, check)) return true;
-                            for (options.boolean_scalars.falsy) |check|
+                            for (options.falsy_boolean_scalars) |check|
                                if (std.mem.eql(u8, str, check)) return false;
                        }

@@ -106,7 +115,7 @@ pub const Value = union(enum) {
                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        return try std.fmt.parseFloat(T, str, 0);
+                        return try std.fmt.parseFloat(T, str);
                    },
                    else => return error.BadValue,
                }
@@ -120,27 +129,29 @@ pub const Value = union(enum) {
                    //       probably be solved in the zig stdlib or similar.
                    switch (self) {
                        .scalar, .string => |str| {
-                            if (ptr.child == u8) {
-                                if (ptr.sentinel) |sent| {
-                                    var copy = try allocator.allocSentinel(u8, str.len, @as(*const u8, @ptrCast(sent)).*);
-                                    @memcpy(copy, str);
-                                    return copy;
-                                }
+                            if (comptime ptr.child == u8) {
+                                if (comptime ptr.sentinel) |sentinel|
+                                    if (comptime @as(*align(1) const ptr.child, @ptrCast(sentinel)).* != 0)
+                                        return error.BadValue;
+
                                return str;
                            } else {
                                return error.BadValue;
                            }
                        },
                        .list, .inline_list => |lst| {
-                            var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
-                            errdefer result.deinit();
-                            for (lst.items) |item| {
-                                result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options));
+                            const result = try allocator.alloc(ptr.child, lst.items.len + @intFromBool(ptr.sentinel != null));
+
+                            for (result[0..lst.items.len], lst.items) |*res, item| {
+                                res.* = try item.convertTo(ptr.child, allocator, options);
                            }
-                            if (ptr.sentinel) |sent| {
-                                return try result.toOwnedSliceSentinel(@as(*align(1) const ptr.child, @ptrCast(sent)).*);
+
+                            if (comptime ptr.sentinel) |sentinel| {
+                                const sval = @as(*align(1) const ptr.child, @ptrCast(sentinel)).*;
+                                result[lst.items.len] = sval;
+                                return result[0..lst.items.len :sval];
                            } else {
-                                return try result.toOwnedSlice();
+                                return result;
                            }
                        },
                        else => return error.BadValue,
@@ -152,7 +163,7 @@ pub const Value = union(enum) {
                    result.* = try self.convertTo(ptr.child, allocator, options);
                    return result;
                },
-                else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers.
+                else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)),
            },
            .Array => |arr| {
                // TODO: There is ambiguity here because a document expecting a list
@@ -169,21 +180,19 @@ pub const Value = union(enum) {
                        } else return error.BadValue;
                    },
                    .list, .inline_list => |lst| {
-                        var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len);
-                        defer storage.deinit();
-                        for (lst.items) |item| {
-                            storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options));
-                        }
-                        // this may result in a big stack allocation, which is not ideal
+                        if (lst.items.len != arr.len) return error.BadValue;
+
                        var result: T = undefined;
-                        @memcpy(&result, storage.items);
+                        for (&result, lst.items) |*res, item| {
+                            res.* = try item.convertTo(arr.child, allocator, options);
+                        }
                        return result;
                    },
                    else => return error.BadValue,
                }
            },
            .Struct => |stt| {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                if (stt.is_tuple) {
@@ -191,8 +200,8 @@ pub const Value = union(enum) {
                        .list, .inline_list => |list| {
                            if (list.items.len != stt.fields.len) return error.BadValue;
                            var result: T = undefined;
-                            inline for (stt.fields, 0..) |field, idx| {
-                                result[idx] = try list.items[idx].convertTo(field.type, allocator, options);
+                            inline for (stt.fields, &result, list.items) |field, *res, item| {
+                                res.* = try item.convertTo(field.type, allocator, options);
                            }
                            return result;
                        },
@@ -204,33 +213,28 @@ pub const Value = union(enum) {
                    .map, .inline_map => |map| {
                        var result: T = undefined;

-                        if (options.ignore_extra_fields) {
-                            inline for (stt.fields) |field| {
-                                if (map.get(field.name)) |value| {
-                                    @field(result, field.name) = try value.convertTo(field.type, allocator, options);
-                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
-                                    @field(result, field.name) = null;
+                        if (!options.ignore_extra_fields and (map.count() > stt.fields.len))
+                            return error.BadValue;
+
+                        var use_count: usize = 0;
+                        inline for (stt.fields) |field| {
+                            if (map.get(field.name)) |val| {
+                                if (comptime hasFn(T, "niceFieldConverter") and T.niceFieldConverter(field.name) != null) {
+                                    @field(result, field.name) = try T.niceFieldConverter(field.name).?(val, allocator, options);
                                } else {
-                                    return error.BadValue;
+                                    @field(result, field.name) = try val.convertTo(field.type, allocator, options);
                                }
-                            }
-                        } else {
-                            // we could iterate over each map key and do an exhaustive
-                            // comparison with each struct field name. This would save
-                            // memory and it would probably be a fair amount faster for
-                            // small structs.
-                            var clone = try map.clone();
-                            defer clone.deinit();
-                            inline for (stt.fields) |field| {
-                                if (clone.fetchSwapRemove(field.name)) |kv| {
-                                    @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
-                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
-                                    @field(result, field.name) = null;
-                                } else return error.BadValue;
-                            }
-                            // there were extra fields in the data
-                            if (clone.count() > 0) return error.BadValue;
+                                use_count += 1;
+                            } else if (options.allow_omitting_default_values) {
+                                if (comptime field.default_value) |def|
+                                    @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
+                                else
+                                    return error.BadValue;
+                            } else return error.BadValue;
                        }
+                        // there were extra fields in the data
+                        if (!options.ignore_extra_fields and (map.count() > use_count))
+                            return error.BadValue;

                        return result;
                    },
@@ -238,7 +242,7 @@ pub const Value = union(enum) {
                }
            },
            .Enum => {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                switch (self) {
@@ -263,25 +267,52 @@ pub const Value = union(enum) {
                }
            },
            .Union => |unn| {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));

                switch (self) {
                    .map, .inline_map => |map| {
-                        // a union may not ever be deserialized from a map with more than one value
+                        // a union may not ever be deserialized from a map with more
+                        // (or less) than one value
                        if (map.count() != 1) return error.BadValue;
                        const key = map.keys()[0];
+                        const name = if (options.expect_enum_dot) blk: {
+                            if (key.len > 0 and key[0] == '.')
+                                break :blk key[1..]
+                            else
+                                return error.BadValue;
+                        } else key;
+
                        inline for (unn.fields) |field| {
-                            if (std.mem.eql(u8, key, field.name))
+                            if (std.mem.eql(u8, name, field.name))
                                return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
                        }
                        return error.BadValue;
                    },
-                    // TODO: if the field is a 0 width type like void, we could parse it
-                    //       directly from a scalar/string value (i.e. a name with no
-                    //       corresponding value)
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        const name = if (options.expect_enum_dot) blk: {
+                            if (str.len > 0 and str[0] == '.')
+                                break :blk str[1..]
+                            else
+                                return error.BadValue;
+                        } else str;
+
+                        inline for (unn.fields) |field| {
+                            if (@sizeOf(field.type) != 0) continue;
+                            // this logic may be a little off: comtime_int,
+                            // comptime_float, and type will all have size 0 because
+                            // they can't be used at runtime. On the other hand, trying
+                            // to use them here should result in a compile error? Also,
+                            // it's a 0 sized type so initializing it as undefined
+                            // shouldn't be a problem. As far as I know.
+                            if (std.mem.eql(u8, name, field.name))
+                                return @unionInit(T, field.name, undefined);
+                        }
+                        return error.BadValue;
+                    },
                    else => return error.BadValue,
                }
            },
@@ -315,7 +346,7 @@ pub const Value = union(enum) {
    }

    inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
-        return @unionInit(Value, @tagName(classification), try alloc.dupe(u8, input));
+        return @unionInit(Value, @tagName(classification), try alloc.dupeZ(u8, input));
    }

    pub inline fn emptyScalar() Value {
--- a/tests/main.zig
+++ b/tests/main.zig
@@ -0,0 +1,5 @@
+comptime {
+    if (@import("builtin").is_test) {
+        _ = @import("./reify.zig");
+    }
+}
--- a/tests/reify.zig
+++ b/tests/reify.zig
@@ -0,0 +1,144 @@
+const std = @import("std");
+
+const nice = @import("nice");
+
+fn reifyScalar(comptime scalar: []const u8, expected: anytype) !void {
+    try reifyScalarWithOptions(scalar, expected, .{});
+}
+
+fn reifyScalarWithOptions(comptime scalar: []const u8, expected: anytype, options: nice.parser.Options) !void {
+    const allocator = std.testing.allocator;
+    var diagnostics = nice.Diagnostics{};
+    const parsed = try nice.parseBufferTo(
+        @TypeOf(expected),
+        allocator,
+        scalar ++ "\n",
+        &diagnostics,
+        options,
+    );
+    defer parsed.deinit();
+
+    try std.testing.expectEqual(expected, parsed.value);
+}
+
+test "reify integer" {
+    try reifyScalar("123", @as(u8, 123));
+    try reifyScalar("0123", @as(u8, 123));
+    try reifyScalar("1_23", @as(u8, 123));
+    try reifyScalar("-01_23", @as(i8, -123));
+}
+
+test "reify hexadecimal" {
+    try reifyScalar("0x123", @as(i64, 0x123));
+    try reifyScalar("0x0123", @as(i64, 0x123));
+    try reifyScalar("0x01_23", @as(i64, 0x123));
+    try reifyScalar("-0x01_23", @as(i64, -0x123));
+}
+
+test "reify octal" {
+    try reifyScalar("0o123", @as(i64, 0o123));
+    try reifyScalar("0o0123", @as(i64, 0o123));
+    try reifyScalar("0o01_23", @as(i64, 0o123));
+    try reifyScalar("-0o01_23", @as(i64, -0o123));
+}
+
+test "reify binary" {
+    try reifyScalar("0b1011", @as(i5, 0b1011));
+    try reifyScalar("0b01011", @as(i5, 0b1011));
+    try reifyScalar("0b010_11", @as(i5, 0b1011));
+    try reifyScalar("-0b010_11", @as(i5, -0b1011));
+}
+
+test "reify float" {
+    try reifyScalar("0.25", @as(f32, 0.25));
+    try reifyScalar("0.2_5", @as(f32, 0.25));
+    try reifyScalar("00.250", @as(f32, 0.25));
+    try reifyScalar("-0.25", @as(f32, -0.25));
+}
+
+test "reify hexfloat" {
+    try reifyScalar("0x0.25", @as(f64, 0x0.25));
+    try reifyScalar("0x0.2_5", @as(f64, 0x0.25));
+    try reifyScalar("0x0.250p1", @as(f64, 0x0.25p1));
+    try reifyScalar("-0x0.25", @as(f64, -0x0.25));
+}
+
+test "reify true" {
+    try reifyScalar("true", true);
+    try reifyScalar("True", true);
+    try reifyScalar("yes", true);
+    try reifyScalar("on", true);
+}
+
+test "reify false" {
+    try reifyScalar("false", false);
+    try reifyScalar("False", false);
+    try reifyScalar("no", false);
+    try reifyScalar("off", false);
+}
+
+test "reify custom true" {
+    const options = nice.parser.Options{ .truthy_boolean_scalars = &.{"correct"} };
+    try reifyScalarWithOptions("correct", true, options);
+}
+
+test "reify true case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("TRUE", true));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("TRUE", true, options);
+}
+
+test "reify custom false" {
+    const options = nice.parser.Options{ .falsy_boolean_scalars = &.{"incorrect"} };
+    try reifyScalarWithOptions("incorrect", false, options);
+}
+
+test "reify false case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("FALSE", false));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("FALSE", false, options);
+}
+
+test "reify null" {
+    try reifyScalar("null", @as(?u8, null));
+    try reifyScalar("nil", @as(?u8, null));
+    try reifyScalar("None", @as(?u8, null));
+}
+
+test "reify custom null" {
+    const options = nice.parser.Options{ .null_scalars = &.{"nothing"} };
+    try reifyScalarWithOptions("nothing", @as(?u8, null), options);
+}
+
+test "reify null case insensitive" {
+    // this is a little weird because when the null string mismatches, it will try to
+    // parse the child optional type and produce either a value or an error from that,
+    // so the error received depends on whether or not the optional child type fails to
+    // parse the given value.
+    try std.testing.expectError(error.InvalidCharacter, reifyScalar("NULL", @as(?u8, null)));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("NULL", @as(?u8, null), options);
+}
+
+test "reify void" {
+    // A void scalar cannot exist on its own as it is not distinguishable from an empty
+    // document.
+    const Void = struct { void: void };
+    try reifyScalar("void:", Void{ .void = void{} });
+}
+
+test "reify void scalar" {
+    const options = nice.parser.Options{ .default_object = .scalar };
+    try reifyScalarWithOptions("", void{}, options);
+}
+
+test "reify enum" {
+    const Enum = enum { one, two };
+    try reifyScalar(".one", Enum.one);
+}
+
+test "reify enum no dot" {
+    const options = nice.parser.Options{ .expect_enum_dot = false };
+    const Enum = enum { one, two };
+    try reifyScalarWithOptions("two", Enum.two, options);
+}
Author	SHA1	Message	Date
torque	e562e30e5e	grammar, spelling	2024-06-18 18:33:57 -07:00
torque	8aaceba484	parser.value.convertTo: add field converter concept It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.	2024-06-18 18:32:22 -07:00
torque	c74d615131	parser.value.convertTo: simplify struct field usage This avoids having to clone the map while maintaining the same conversion strictness.	2024-06-18 18:32:22 -07:00
torque	8ccb2c3a66	build: update for zig-0.13	2024-06-18 18:24:19 -07:00
torque	ad73ea6508	build: update for 0.12.0-dev.2208+4debd4338 I am hoping that by starting to roll over to zig 0.12 now it will be easier to migrate when the release actually happens. Unfortunately, the build system API changed fairly significantly and supporting both 0.11 and 0.12-dev is not very interesting.	2024-01-15 22:10:15 -08:00
torque	875b1b6344	start adding tests	2023-12-01 22:35:18 -08:00
torque	ea52c99fee	parser.Options: split truthy/falsy scalars into separate fields This makes overriding the defaults of just one of truthy or falsy more ergonomic. Previously, when overriding the truthy scalars, the user would also have to specify all of the falsy scalars as well.	2023-12-01 22:33:14 -08:00
torque	dbf2762982	parser: empty document should be scalar, not string I think I originally set this up before I had fully decided on the semantics of scalars vs strings. This option makes much more sense to me because it mirrors the empty value behavior map keys. Without an introducer sequence, it's can't be a string.	2023-12-01 22:31:30 -08:00
torque	0f4a9fcaa7	misc: commit things at random	2023-11-23 18:38:03 -08:00
torque	bd079b42d9	compile with zig master I was actually anticipating a bit more stdlib breakage than this, so I ended up just shimming it. Well, it works and also still works with 0.11.0, which is cool.	2023-11-23 18:37:19 -08:00
torque	bd0d74ee6a	examples.reify: add default value field	2023-11-23 17:56:27 -08:00
torque	2208079355	parser.Options: embellish expect_enum_dot description This affects tagged union parsing, and that should be mentioned here. So now it is.	2023-11-23 17:55:47 -08:00
torque	98eac68929	value: simplify list conversion code There was really no reason to use ArrayLists here when the list length is known ahead of time. This slightly shortens the code and should be slightly more memory/stack efficient.	2023-11-23 17:54:14 -08:00
torque	39619e7d6b	value: fix use of parseFloat	2023-11-23 17:52:38 -08:00
torque	33ab092a06	value: store strings/scalars as null-terminated Since these were already always copied from the source data, this was a very easy change to implement. This makes our output schema string detection a bit stricter, and saves performing a copy in the case that the output string needs to be 0 terminated. Unfortunately, we can't skip copies in the general slice case since each child element needs to get converted to the appropriate type.	2023-11-23 17:52:38 -08:00
torque	21a9753d46	parser: change omitted value behavior to work with all default values Special casing optional values was a little odd before. Now, the user can supply a default value for any field that may be omitted from the serialized data. This behaves the same way as the stdlib JSON parser as well.	2023-11-23 17:47:21 -08:00
torque	e8ddee5ab2	examples.reify: implement updated union/enum semantics	2023-11-06 20:45:04 -08:00
torque	2f90ccba6f	parser: accept 0-size tagged union values as scalars Given the type: union(enum) { none: void, any: []const u8, }; Previously your document would have had to be .none: But now this can also be parsed as the simple scalar .none This is much nicer if the tagged union is a member of a larger type, like a struct, since the value can be specified in-line without needing to create a map. my_union: .none Whereas previously this would have had to have been (this style is still supported): my_union: { .none: } or my_union: .none:	2023-11-06 20:45:04 -08:00
torque	d6e1e85ea1	parser: make tagged union field names respect expect_enum_dot It's possible that this change may get reverted in the future, but I think it makes things more consistent and has some other minor benefits, so it probably won't be. Consistency: tagged union fields are enum members by definition in zig, so it makes these act like enumerations that accept values, which is really how tagged unions work in zig. Other benefits: tagged unions do not behave like structs, and having their key start with a leading . helps to distinguish them visually. You could say that it makes communicating intent more precise. Here's an example: by default, given the following type: union(enum) { any: []const u8, int: i32, }; A corresponding nice document would now look like: .int: 42069 Whereas it used to be: int: 42069 My only concern here is that this potentially makes the serialization noisier. But if so that's true of the enum handling, too.	2023-11-06 20:43:21 -08:00
torque	ed913ab3a3	state: properly update key order when preserving the last key Since I decided that Nice would guarantee (for some definition of guarantee) preserving the order of keys in a document, this has some impact on the parsing modes that tolerate duplicate keys. In the case that the last instance of a duplicate key is the one that is preserved, its order should be reflected. In general, however, it's recommended not to permit duplicate keys, which is why that's the default behavior.	2023-11-06 20:15:02 -08:00
torque	73575a43a7	readme: basic editing pass This still needs a lot of TLC to be actually, y'know, decent, but at least it can become infinitesimally less bad.	2023-11-06 20:13:06 -08:00