grammar, spelling

parser.value.convertTo: add field converter concept
It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.
2024-06-18 18:33:57 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:32:22 -07:00 · 2024-06-18 18:24:19 -07:00 · 2024-01-15 22:10:15 -08:00 · 2023-12-01 22:35:18 -08:00
15 changed files with 1170 additions and 410 deletions
--- a/build.zig
+++ b/build.zig
@@ -2,11 +2,26 @@ const std = @import("std");

 pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});

    const nice = b.addModule("nice", .{
-        .source_file = .{ .path = "src/nice.zig" },
+        .root_source_file = b.path("src/nice.zig"),
    });

+    const tests = b.addTest(.{
+        .name = "nice-unit-tests",
+        .root_source_file = b.path("tests/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    tests.root_module.addImport("nice", nice);
+
+    const run_main_tests = b.addRunArtifact(tests);
+    const test_step = b.step("test", "Run tests");
+    test_step.dependOn(&b.addInstallArtifact(tests, .{}).step);
+    test_step.dependOn(&run_main_tests.step);
+
    add_examples(b, .{
        .target = target,
        .nice_mod = nice,
@@ -14,7 +29,7 @@ pub fn build(b: *std.Build) void {
 }

 const ExampleOptions = struct {
-    target: std.zig.CrossTarget,
+    target: std.Build.ResolvedTarget,
    nice_mod: *std.Build.Module,
 };

@@ -26,20 +41,21 @@ const Example = struct {
 const examples = [_]Example{
    .{ .name = "parse", .file = "examples/parse.zig" },
    .{ .name = "stream", .file = "examples/stream.zig" },
+    .{ .name = "reify", .file = "examples/reify.zig" },
 };

-pub fn add_examples(b: *std.build, options: ExampleOptions) void {
+pub fn add_examples(b: *std.Build, options: ExampleOptions) void {
    const example_step = b.step("examples", "build examples");

    inline for (examples) |example| {
        const ex_exe = b.addExecutable(.{
            .name = example.name,
-            .root_source_file = .{ .path = example.file },
+            .root_source_file = b.path(example.file),
            .target = options.target,
            .optimize = .Debug,
        });

-        ex_exe.addModule("nice", options.nice_mod);
+        ex_exe.root_module.addImport("nice", options.nice_mod);
        const install = b.addInstallArtifact(ex_exe, .{});
        example_step.dependOn(&install.step);
    }
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,5 +1,12 @@
 .{
-    .name = "nice-data",
+    .name = "nice",
    .version = "0.1.0-pre",
    .dependencies = .{},
+    .paths = .{
+        "src",
+        "build.zig",
+        "build.zig.zon",
+        "license",
+        "readme.md",
+    },
 }
--- a/examples/parse.zig
+++ b/examples/parse.zig
@@ -1,3 +1,6 @@
+// This example is dedicated to the public domain or, where that is not possible,
+// licensed under CC0-1.0, available at https://spdx.org/licenses/CC0-1.0.html
+
 const std = @import("std");

 const nice = @import("nice");
--- a/examples/reify.zig
+++ b/examples/reify.zig
@@ -0,0 +1,104 @@
+// This example is dedicated to the public domain or, where that is not possible,
+// licensed under CC0-1.0, available at https://spdx.org/licenses/CC0-1.0.html
+
+const std = @import("std");
+
+const nice = @import("nice");
+
+const Enum = enum { first, second, third };
+const TagUnion = union(Enum) { first: []const u8, second: i32, third: void };
+
+const Example = struct {
+    useful: bool,
+    number: i32,
+    string: []const u8,
+    longstring: [:0]const u8,
+    tuple: struct { bool, i8 },
+    enume: Enum,
+    taggart: TagUnion,
+    voidtag: TagUnion,
+    exist: ?bool,
+    again: ?bool,
+    array: [5]i16,
+    nested: [3]struct { index: usize, title: []const u8 },
+    default: u64 = 0xDEADCAFE,
+};
+
+const source =
+    \\useful: true
+    \\number: 0x9001
+    \\string: > salutations, earthen oblate spheroid
+    \\
+    \\longstring:
+    \\	| If, at first, you don't think this string has
+    \\	+ multiple lines, then perhaps you are the one who is
+    \\	# yeah, let's add a newline here
+    \\	> wrong.
+    \\	# and a trailing newline for good measure
+    \\	>
+    \\tuple: [ no, 127 ]
+    \\enume: .second
+    \\taggart: {.first: string a thing}
+    \\voidtag: .third
+    \\list:
+    \\	- I am a list item
+    \\exist: null
+    \\again: true
+    \\array: [ 1, 2, 3, 4, 5 ]
+    \\nested:
+    \\	- { index: 1, title: none }
+    \\	- { index: 2, title: such }
+    \\	- { index: 3, title: luck }
+    \\
+;
+
+pub fn main() !void {
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+    const allocator = gpa.allocator();
+
+    var diagnostics = nice.Diagnostics{};
+    var loaded = nice.parseBufferTo(Example, allocator, source, &diagnostics, .{}) catch |err| {
+        std.debug.print("row:{d} col:{d}: {s}\n", .{
+            diagnostics.row,
+            diagnostics.line_offset,
+            diagnostics.message,
+        });
+        return err;
+    };
+    defer loaded.deinit();
+
+    std.debug.print("{s} {{\n", .{@typeName(Example)});
+    std.debug.print("    useful: {}\n", .{loaded.value.useful});
+    std.debug.print("    number: {d}\n", .{loaded.value.number});
+    std.debug.print("    string: {s}\n", .{loaded.value.string});
+    std.debug.print("    longstring: {s}\n", .{loaded.value.longstring});
+    std.debug.print("    tuple: {{ {}, {d} }}\n", .{ loaded.value.tuple[0], loaded.value.tuple[1] });
+    std.debug.print("    enume: .{s}\n", .{@tagName(loaded.value.enume)});
+    std.debug.print("    taggart: ", .{});
+    switch (loaded.value.taggart) {
+        .first => |val| std.debug.print(".first = {s}\n", .{val}),
+        .second => |val| std.debug.print(".second = {d}\n", .{val}),
+        .third => std.debug.print(".third\n", .{}),
+    }
+    std.debug.print("    voidtag: ", .{});
+    switch (loaded.value.voidtag) {
+        .first => |val| std.debug.print(".first = {s}\n", .{val}),
+        .second => |val| std.debug.print(".second = {d}\n", .{val}),
+        .third => std.debug.print(".third\n", .{}),
+    }
+    std.debug.print("    exist: {?}\n", .{loaded.value.exist});
+    std.debug.print("    again: {?}\n", .{loaded.value.again});
+    std.debug.print("    array: [ ", .{});
+    for (loaded.value.array) |item| {
+        std.debug.print("{d}, ", .{item});
+    }
+    std.debug.print("]\n", .{});
+    std.debug.print("    nested: [\n", .{});
+    for (loaded.value.nested) |item| {
+        std.debug.print("        {{ index: {d}, title: {s} }}\n", .{ item.index, item.title });
+    }
+    std.debug.print("    ]\n", .{});
+    std.debug.print("    default: 0x{X}\n", .{loaded.value.default});
+    std.debug.print("}}\n", .{});
+}
--- a/examples/stream.zig
+++ b/examples/stream.zig
@@ -1,3 +1,6 @@
+// This example is dedicated to the public domain or, where that is not possible,
+// licensed under CC0-1.0, available at https://spdx.org/licenses/CC0-1.0.html
+
 const std = @import("std");

 const nice = @import("nice");
--- a/7
+++ b/7
@@ -0,0 +1,7 @@
+Copyright 2023 torque@epicyclic.dev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,325 @@
+Have you ever wished someone would walk up to you and say, in a tremendously exaggerated, stereotypical surfer voice, "nice data, dude"? Well, wish no longer because now your data can be Nice by definition, due to our patented Manipulative Marketing Naming technique. Introducing!
+
+# Nice Data: There's no Escape
+
+```nice
+# this is an example of some Nice data.
+project:
+    name: Nice data
+    description:
+        | A file format for storing structured data. Nice uses syntactic whitespace
+        + to represent the data structure. It defines two types of data, scalars and
+        + strings, which are used to compose its two data structures, lists and maps.
+        >
+        > Nice to write, Nice to read.
+    inspiration:
+        - { name: NestedText, url: https://nestedtext.org }
+        - { name: YAML,       url: https://yaml.org }
+    non-goals: [ general-purpose data serialization, world domination ]
+    epic freaking funny number lol: 42069580089001421337666
+```
+
+Nice Data is a format for storing structured data in a file. It's pleasant to read and adheres to the philosophy that form should match structure. It's heavily inspired by [NestedText], though it also looks similar to [YAML].
+
+## Syntax
+
+For the purposes of illustration, the following syntax examples are accompanied by their corresponding JSON representation. If you are not already familiar with JSON syntax, I would certainly like to know how you got here.
+
+### Comments
+
+A comment is any line starting with the octothorp, or perhaps the number sign or pound sign or hash or sharp symbol, followed by a space. The space is not optional and it is a syntax error if it is omitted. The comment continues to the end of a line. Comments must occupy their own line, and they do not need to respect the document indentation (though this is strongly encouraged). Comments should be considered to be "attached" to the line below them.
+
+```nice
+# this is an example of a comment
+# nothing stops the creation of multiple adjacent comments
+# it's good to explain what you are doing
+```
+
+### Scalar Values
+
+A scalar value is a sequence of valid UTF-8 codepoints. Scalars cannot contain leading or trailing spaces (specifically ASCII `0x20`) or the ASCII linefeed character but otherwise may contain any valid printing character. A scalar is essentially a UTF-8 string, but in a way that indicates that the parser can interpret it as a different type, if necessary. The following are a few examples of valid scalar values (note that a scalar may not occupy more than one line).
+
+- `100`
+- `spaces inside the scalar are no problem`
+- `2023-10-19 07:16:38Z`
+
+### String Values
+
+A string value is very similar to a scalar value, except that it is started by a leader character sequence and ended with trailer character sequence. Strings may be spread across multiple lines (here, we call each line a string fragment), and each fragment must start with a leader and end with the trailer. Strings fragments respect leading whitespace (after the leader sequence), unlike scalars. The trailer may be used to include trailing whitespace in a fragment. Comments may be interspersed between the fragments that compose a string (demonstrated below).
+
+The string leader sequence consists of an ASCII character followed by a single ASCII space. The space must be omitted if the fragment contains no other characters (because otherwise it would be trailing whitespace, which is forbidden). The leader sequence defines how the fragments of the string are concatenated together, as follows:
+
+- `| ` specifies that this fragment of the string should be directly concatenated onto the previous fragment.
+
+   ```nice
+   | ABCDEFGHIJKLM
+   | NOPQRSTUVWXYZ
+   ```
+
+   parses to the string `"ABCDEFGHIJKLMNOPQRSTUVWXYZ"`
+
+- `+ ` specifies that this fragment of the string should have a space prepended to it and then be concatenated onto the previous fragment.
+
+   ```nice
+   | hello
+   + to the
+   + world
+   ```
+
+   parses to the string `"hello to the world"`. This also demonstrates that different fragment leaders may be intermixed.
+
+- `> ` specifies that this fragment of the string should have a linefeed character prepended to it and then be concatenated onto the previous fragment.
+
+   ```nice
+   > my
+   # the leading space in this fragment is preserved
+   >  multiline
+   >
+   > string
+   # this is used to add a trailing newline
+   >
+   ```
+
+   parses to the string `"my\n multiline\n\nstring\n"`.
+
+Note that the leader of the first fragment of a string has no effect on the string, and may be any of the three options, but using `| ` is recommended.
+
+The standard fragment trailer is just the normal literal linefeed character `"\n"`, as shown in the examples above. However, because Nice does not permit trailing whitespace syntactically, a string fragment may use the pipe character `|` as a trailer. If the last character in a string fragment is `|`, it will be stripped from the fragment while preserving the rest of the line. If a string fragment needs to end with a pipe character, the pipe must be doubled, as the last `|` will be stripped from the fragment.
+
+```nice
+| lots of   |
+| space
+# to end a string line with a pipe character, it must be doubled. Pipes within
+# the line are not special in any way
+> | many | pipes | abound ||
+```
+
+parses to the string `"lots of   space\n| many | pipes | abound |"`.
+
+Consider also that composing the above rules, an empty string fragment may be represented either with `|` or with `| |`. The former is preferred.
+
+### Lists
+
+A list is an ordered sequence of values. These values may be scalars, strings, other lists, or maps. List items are introduced with the sequence `- ` (ASCII minus followed by a space). Similar to string fragment leaders, if a list item is empty, the trailing space of the introducer must be omitted. Comments may be interspersed between subsequent list items. An example:
+
+```nice
+- a list
+# this is an inline string
+- > containing
+# this is an empty list item
+-
+-
+    | several
+    + values
+```
+
+parses to the following JSON structure:
+
+```JSON
+["a list", "containing", "", "several values"]
+```
+
+There are a couple of new concepts here. The first new concept is demonstrated in the second value, which is an inline string. This is a standard string fragment that appears on the same line after another introducer (either a list item introducer, as in this example, or a map key introducer, which will be demonstrated in the section describing maps). The only difference between an inline string and a normal string as discussed above is that the inline string is composed of only a single fragment (meaning it cannot be spread across multiple lines). The string leader used has no effect on an inline string, since the leader is not applied.
+
+The other new concept is structural indentation. The fourth list item contains an indented string following a list item introducer that does not contain an inline value. Because the string sequence is indented, it belongs to the list item introduced immediately before it. Note that an indented sequence following an introducer that contains an inline value is a syntactic error. That is, the following document **cannot** be parsed:
+
+```nice
+- inline value
+    > invalid subsequent indented value
+```
+
+Indentation is how all parent-child structural relationships are represented in Nice. Here's an example of nesting multiple lists:
+
+```nice
+- start the parent
+-
+    - this is a child item
+    -
+        - grandchild here
+    - back to the child
+    -
+        - another grandchild
+- finish the parent
+```
+
+which parses to the following JSON structure:
+
+```JSON
+[
+    "start the parent",
+    [
+        "this is a child item",
+        [
+            "grandchild here"
+        ],
+        "back to the child",
+        [
+            "another grandchild"
+        ]
+    ],
+    "finish the parent"
+]
+```
+
+The Nice document is similar in layout to its indented JSON counterpart but contains somewhat less bracketry.
+
+### Inline Lists
+
+Inline lists allow a list to be specified in a more concise form on a line following another item introducer (either a list item introducer or a map item introducer). They consist of a comma-separated sequence of scalars within a pair of square brackets (`[` and `]`). Inline lists may also contain other inline lists and inline maps (discussed later), but they cannot contain strings. Whitespace before and after values in an inline list is ignored, though whitespace within a value is preserved. Inline list values may not contain commas. For reasons related to intellectual bankruptcy, `[]` and `[ ]` are distinct values, just as they are in NestedText. `[]` represents an empty list, while `[ ]` represents a list containing a single empty string. As is hopefully suggested by the name, an inline list *must* be specified on a single line.
+
+Inline lists are provided for when some parts of a document may benefit from having horizontal layout rather than vertical layout. It can also be used tactically to improve readability in other ways, but should not, in general, be preferred over standard lists. Here's the previous example, with a bit less indentation thanks to use of inline lists:
+
+```nice
+- start the parent
+-
+    - this is a child item
+    - [ grandchild here ]
+    - back to the child
+    - [ another grandchild ]
+- finish the parent
+```
+
+Of course, this document could be represented using inline lists exclusively, but this is not recommended:
+
+```nice
+[ start the parent, [ this is a child item, [ grandchild here ], back to the child, [ another grandchild ] ], finish the parent ]
+```
+
+Hopefully you agree that readability suffers when a more complex hierarchy is jammed into an inline list. However, judicious use can dramatically improve readability, such as in the case of representing a 2D data structure with a list of lists:
+
+```nice
+- [  1,  2,  3,  4,  5,  6 ]
+- [  7,  8,  9, 10, 11, 12 ]
+- [ -1, -2, -3, -4, -5, -8 ]
+```
+
+### Maps
+
+A map is a data structure consisting of a sequence of pairs, with each pair being composed of a key and value. A map may represent a general-purpose pair-based data structure such as a hashtable, or it may represent a strictly defined data type with a fixed number of named fields, like a C `struct`. The keys of the map are exclusively scalars, but the corresponding values may be any Nice type or scalar, including scalars, strings, lists, or other maps.
+
+A map item is introduced by the key scalar. A key scalar is a scalar value that is terminated with an ASCII colon followed by a space `: `. The `:` is removed from the end of the key scalar when parsing. Key scalars may not begin with a sequence that is used for introducing a different type, which means that map keys cannot start with `#` (comments), `- ` (list item introducer), `+ `, `| `, `> ` (string fragment leaders), `[` (inline lists), or `{` (inline maps). `-`, `+`, `|`, and `>` without a following space may be used to begin map keys unambiguously, but `#`, `[`, and `{` are always forbidden. Additionally, key scalars may not contain a colon `:`. Comments may intersperse map pairs. As with the other introducers, if the key scalar is the only item on a line, it must not have a trailing space.
+
+Enough talk, have an example:
+
+```nice
+a scalar: value
+a string:
+    | hello
+    + from a map
+inline string: | hello from a map
+a list:
+    - true
+    - false
+    - null
+inline list: [ 1, 2, 3 ]
+a map:
+    nested:
+        several: levels
+an empty value:
+```
+
+This maps to the following JSON structure:
+
+```JSON
+{
+    "a scalar": "value",
+    "a string": "hello from a map",
+    "inline string": "hello from a map",
+    "a list": ["true", "false", "null"],
+    "inline list": ["1", "2", "3"],
+    "a map": { "nested": { "several": "levels" } },
+    "an empty value": ""
+}
+```
+
+Serialized maps are inherently ordered, but the data structures they represent do not necessarily preserve order. Nice guarantees that the order of the map keys, as they were encountered in the document, is preserved. Serialized maps can also represent multiple entries that have the same key. This is not generally useful (if you need to have multiple values for a given key, its corresponding value should be a list) and cannot typically be represented by a map data structure. The Nice parser can be configured to produce a parse error when a duplicate key is encountered (the default behavior) or it can preserve either only first encountered duplicate value or only the last encountered duplicate value (in this case, the map order preserves the index of the last encountered duplicate, which may be less efficient if many duplicates exist, since it requires performing an ordered remove on the previously encountered instance).
+
+ASCII spaces following the key scalar will be ignored, allowing adjacent values to be justified. The key scalar itself may not contain trailing or leading whitespace. A line only ever contains a single key scalar, unlike YAML. Maps must be nested using structural indentation.
+
+```nice
+fully aligned: value: 1
+values:        value: 2
+```
+
+```JSON
+{
+    "fully aligned": "value: 1",
+    "values": "value: 2"
+}
+```
+
+### Inline Maps
+
+The final syntactic construct is the inline map, which is, as its name hopefully suggests, the map equivalent of an inline list. An inline map is introduced by an opening curly brace `{` and closed by an opposing brace `}`. An inline map consists of a sequence of key-value pairs with the keys being separated from the values by the `:` character. An inline map may contain scalars, inline lists, and other inline maps as values, and all of its keys must be scalars. As with inline lists, whitespace surrounding values is ignored, and whitespace preceding keys is also ignored (there must be no whitespace between the key and its following `:`).
+
+```nice
+an example: { this: is, an inline: map }
+nests:
+    - { a list: [ of, { inline: maps } ] }
+```
+
+```JSON
+{
+    "an example": {"this": "is", "an inline": "map"},
+    "nests": [
+        { "a list": [ "of", { "inline": "maps" } ] }
+    ]
+}
+```
+
+## Restrictions
+
+Nice documents must be encoded in valid UTF-8 with no BOM. They must use `LF`-only newlines (`CR` characters are forbidden). Tabs and spaces cannot be mixed for indentation. Indentation *must* adhere to a consistent quantum throughout the whole document, including on comment lines. Nonprinting ASCII characters are forbidden (specifically, any character less than `0x20` (space) except for `0x09` (horizontal tab) and `0x0A` (newline)). Trailing whitespace, including lines consisting only of whitespace, is forbidden, although empty lines are permitted. Some keys and values cannot be represented (for example, map keys cannot start with the character `#`, though map values can).
+
+## Philosophy
+
+### Let the Application Interpret Data Types (Bring Your Own Schema)
+
+An arbitrarily structured data format with strict types adds complexity to the parser and cannot possibly cover all necessary types needed for every possible application. For example, numbers in JSON are represented by a sequence of ASCII characters, but they are defined by the format to be restricted to specifying double precision floating point numbers. Of course, it is possible to generate a numeric ASCII sequence that does not fit into a double precision floating point number. If an application needs to represent a 64-bit integer in JSON without producing technically invalid JSON, the value must be serialized as a string, which places the burden of decoding it on the application, since the format cannot represent it as a direct numeric value. The same is true of an RFC 3339 datetime. It's not possible for a format to account for every possible data type that an application may need, so don't bother. Users are encouraged to parse Nice documents directly into well-defined, typed structures. If you're interested, the NestedText documentation contains [several examples of why having strict data types in your serialization format is not as useful as you think][only-strings].
+
+Nice explicitly differentiates between bare scalars and strings so that `null` may be disambiguated and interpreted differently from `"null"`.
+
+### Fewer Rules over Flexibility
+
+Nice is not, and does not try to be, a general-purpose data serialization format. There are, in fact, many values that simply cannot be represented Nicely. For example, map keys cannot start with a variety of characters, including `#`, `{`, `[`, or whitespace, which is a conscious design choice. In general, Nice is not a format designed with any emphasis placed on ease of programmatic production. While creating software that produces valid Nice data is certainly possible, this reference implementation has no functionality to do so.
+
+### There's No Need to Conquer the World
+
+Nice has no exhaustive specification or formal grammar. The parser is handwritten, and there are pretty much guaranteed to be some strange edge cases that weren't considered when writing it. Standardization is a good thing, generally speaking, but it's not a goal here. Perhaps this is driven by the author's indolence more than deep philosophical zealotry. On the other hand, this paragraph is under the philosophy section.
+
+# The Implementation
+
+The Reference™ Nice parser/deserializer is this Zig library. It contains a handwritten nonrecursive parser to a generic data structure (`nice.Value`, a tagged union that can represent a scalar, a string, a list of these generic values, or a map of scalars to these generic values). The included example scripts demonstrate how to use the API. See `examples/parse.zig` for one-shot parsing from a slice. `examples/stream.zig` demonstrates how to parse streaming data that does not require loading a whole document into memory at once. This is slower but will generally have a lower peak memory usage (though that is mainly driven by the size of the document).
+
+`nice.Value` has a method to recursively be converted into a strongly
+typed user-defined structure. Zig's compile-time reflection is used to generate code to perform appropriate type conversion. There a variety of options which can be used to control specific details of the conversion, which are governed by `nice.parser.Options`. `examples/reify.zig` demonstrates basic use of this functionality.
+
+A reference to a `nice.Diagnostics` object with a lifecycle at least as long as the parser must always be provided when parsing. If the source document could not be parsed, this diagnostic object will contain a human-readable explanation of the invalid syntax in the source document that caused the parser to error.
+
+## Memory Strategy
+
+The parser wraps a user-provided allocator in an arena, which is used for all internal allocations. All parsed values are copied into the arena rather than storing references to the source document. The parse result contains a reference to the arena, which can be used to free all of the data allocated during parsing.
+
+# Disclaimer
+
+It's entirely possible you hate this and think it's not, in fact, a nice data format. That's fine, but, unfortunately, you forgot to make a time machine and go back in time to make me name it something else. And yeah, this is probably impossible to search for.
+
+# FAQ
+
+Q: This is so similar to NestedText, why on earth didn't you just implement that?
+
+A: in my opinion, it's extremely stupid that NestedText does not support indentation using tabs. Also, trailing whitespace is 100% satanic (in the bad way). And if an implementation is going to diverge there, it might as well roll in some other ideas, call it a new format, and just ruin the world with one more slightly-incompatible thing.
+
+Q: Why is this documentation kind of bad?
+
+A: I'll be honest, I ran out of steam while writing it. For a format that probably nobody besides me will ever use because there's so much open source code in the world that anything without heavy marketing tends to die in obscurity, it's a lot of work to write down the things I already know. But I have put an FAQ section here, while also indicating nobody has ever asked questions about this. Hmm.
+
+# License
+
+What are you going to do, steal my open-source code? Oh, noooooooooo. Here, let me help you.
+
+Library is licensed MIT, examples are Public Domain/CC0. See file headers and the file `license` in the source tree for details.
+
+[NestedText]: https://nestedtext.org
+[only-strings]: https://nestedtext.org/en/latest/alternatives.html#only-strings
+[YAML]: https://yaml.org
--- a/src/linebuffer.zig
+++ b/src/linebuffer.zig
@@ -1,3 +1,13 @@
+// Copyright 2023 torque@epicyclic.dev
+//
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
+//
+//    https://spdx.org/licenses/MIT.html
+//
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.
+
 const std = @import("std");

 const Diagnostics = @import("./parser.zig").Diagnostics;
--- a/src/nice.zig
+++ b/src/nice.zig
@@ -1,63 +1,12 @@
-// Heavily inspired by, but not quite compatible with, NestedText. Key differences:
+// Copyright 2023 torque@epicyclic.dev
 //
-// - Doesn't support multiline keys (this means map keys cannot start with
-//   ' ', \t, #, {, [, |, or >, and they cannot contain :)
-// - Allows using tabs for indentation (but not mixed tabs/spaces)
-// - Indentation must be quantized consistently throughout the document. e.g.
-//   every nested layer being exactly 2 spaces past its parent. Tabs may
-//   only use one tab per indentation level.
-// - Allows flow-style lists, maps, and strings on the same line as map keys or
-//   list items (i.e. the following are legal):
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
 //
-//      key: {inline: map}
-//      key: [inline, list]
-//      key: > inline string
-//      - {map: item}
-//      - [list, item]
-//      - > inline string
+//    https://spdx.org/licenses/MIT.html
 //
-//   The string case retains the possibility of having an inline map value starting
-//   with {, [, or >
-// - a map keys and list item dashes must be followed by a value or an indented
-//   section to reduce parser quantum state. This means that
-//
-//      foo:
-//      bar: baz
-//
-//   or
-//
-//      -
-//      - qux
-//
-//   are not valid. This can be represented with an inline empty string after foo:
-//
-//      foo: >
-//      bar: baz
-//
-//   or
-//
-//      - >
-//      - qux
-//
-// - newlines are strictly LF, if the parser finds CR, it is an error
-// - blank lines may not contain any whitespace characters except the single LF
-// - Additional string indicator `|` for soft-wrapped strings, i.e.
-//
-//      key: | this is not special
-//      key:
-//        | these lines are
-//        | soft-wrapped
-//
-//   soft-wrapped lines are joined with a ' ' instead of a newline character.
-//   Like multiline strings, the final space is stripped (I guess this is a very
-//   janky way to add trailing whitespace to a string).
-//
-// - terminated strings to allow trailing whitespace:
-//      | this string has trailing whitespace    |
-//      > and so does this one                   |
-// - The parser is both strict and probably sloppy and may have weird edge
-//   cases since I'm slinging code, not writing a spec. For example, tabs are
-//   not trimmed from the values of inline lists/maps
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.

 const std = @import("std");

@@ -65,6 +14,7 @@ pub const buffers = @import("./linebuffer.zig");
 pub const tokenizer = @import("./tokenizer.zig");
 pub const parser = @import("./parser.zig");
 pub const parseBuffer = parser.parseBuffer;
+pub const parseBufferTo = parser.parseBufferTo;
 pub const StreamParser = parser.StreamParser;
 pub const Document = parser.Document;
 pub const Value = parser.Value;
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -1,3 +1,13 @@
+// Copyright 2023 torque@epicyclic.dev
+//
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
+//
+//    https://spdx.org/licenses/MIT.html
+//
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.
+
 const std = @import("std");

 const buffers = @import("./linebuffer.zig");
@@ -40,7 +50,7 @@ pub const Options = struct {
    // If an empty document is parsed, this defines what value type should be the
    // resulting document root object. The default behavior is to emit an error if the
    // document is empty.
-    default_object: enum { string, list, map, fail } = .fail,
+    default_object: enum { scalar, list, map, fail } = .fail,

    // Only used by the parseTo family of functions.
    // If false, and a mapping contains additional keys that do not map to the fields of
@@ -51,11 +61,11 @@ pub const Options = struct {
    ignore_extra_fields: bool = true,

    // Only used by the parseTo family of functions.
-    // If true, if a struct field is an optional type and the corresponding mapping key
-    // does not exist, the object field will be set to `null`. By default, if the
-    // parsed document is missing a mapping key for a given field, an error will be
-    // raised instead.
-    treat_omitted_as_null: bool = false,
+    // If true, if a struct field has a default value associated with it and the
+    // corresponding mapping key does not exist, the object field will be set to the
+    // default value. By default, this behavior is enabled, allowing succinct
+    // representation of objects that have default fields.
+    allow_omitting_default_values: bool = true,

    // Only used by the parseTo family of functions.
    // If true, strings may be coerced into other scalar types, like booleans or
@@ -64,15 +74,39 @@ pub const Options = struct {
    coerce_strings: bool = false,

    // Only used by the parseTo family of functions.
-    // Two lists of strings. Truthy strings will be parsed to boolean true. Falsy
-    // strings will be parsed to boolean  false. All other strings will raise an
-    // error.
-    boolean_strings: struct { truthy: []const []const u8, falsy: []const []const u8 } = .{
-        .truthy = &.{ "true", "True", "yes", "on" },
-        .falsy = &.{ "false", "False", "no", "off" },
-    },
+    // Two lists of strings. Scalars in a document that match any of the truthy values
+    // will be parsed to boolean true. Scalars in the document that match any of the
+    // falsy values will be parsed to boolean false. All other scalar values will raise
+    // an error if the destination is a boolean type. By default, these comparisons are
+    // case-sensitive. See the `case_insensitive_scalar_coersion` option to change
+    // this.
+    truthy_boolean_scalars: []const []const u8 = &.{ "true", "True", "yes", "on" },
+    falsy_boolean_scalars: []const []const u8 = &.{ "false", "False", "no", "off" },

-    null_strings: []const []const u8 = &.{ "null", "nil", "None" },
+    // Only used by the parseTo family of functions.
+    // A list of strings. Scalars in the document that match any of the values listed
+    // will be parsed to optional `null`. Any other scalar value will be parsed as the
+    // optional child type if the destination type is an optional. By default, these
+    // comparisons are case-sensitive. See the `case_insensitive_scalar_coersion`
+    // option to change this.
+    null_scalars: []const []const u8 = &.{ "null", "nil", "None" },
+
+    // Only used by the parseTo family of functions.
+    // Choose whether to strip the leading `.` off of expected enum values. By default,
+    // `.enum_field` will be parsed into the enum field `enum_field`, which makes them
+    // look like source code enum literals. Any enum value missing the leading `.` will
+    // result in a conversion error. If set to false, no preprocessing will be done
+    // and enum values will be converted from the literal scalar/string. These two styles
+    // cannot be mixed in a single document. Note that this setting also affects how
+    // tagged unions are parsed (specifically, the union's field name must also have the
+    // leading `.` if this option is enabled.)
+    expect_enum_dot: bool = true,
+
+    // Only used by the parseTo family of functions.
+    // Perform ASCII-case-insensitive comparisons for scalars (i.e. `TRUE` in a document
+    // will match `true` in the boolean scalars. Unicode case folding is not currently
+    // supported.
+    case_insensitive_scalar_coersion: bool = false,

    // Only used by the parseTo family of functions.
    // If true, document scalars that appear to be numbers will attempt to convert into
@@ -121,6 +155,7 @@ pub fn parseBufferTo(
    options: Options,
 ) !Parsed(T) {
    var doc = try parseBuffer(allocator, buffer, diagnostics, options);
+    errdefer doc.deinit();
    return try doc.convertTo(T, options);
 }

--- a/src/parser/state.zig
+++ b/src/parser/state.zig
@@ -1,3 +1,13 @@
+// Copyright 2023 torque@epicyclic.dev
+//
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
+//
+//    https://spdx.org/licenses/MIT.html
+//
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.
+
 const std = @import("std");

 const tokenizer = @import("../tokenizer.zig");
@@ -26,6 +36,7 @@ pub const State = struct {
    document: Document,
    diagnostics: *Diagnostics,
    value_stack: Stack,
+    string_builder: std.ArrayListUnmanaged(u8),
    mode: enum { initial, value, done } = .initial,
    expect_shift: tokenizer.ShiftDirection = .none,
    dangling_key: ?[]const u8 = null,
@@ -35,6 +46,7 @@ pub const State = struct {
            .document = Document.init(allocator),
            .diagnostics = diagnostics,
            .value_stack = Stack.init(allocator),
+            .string_builder = std.ArrayListUnmanaged(u8){},
        };
    }

@@ -47,7 +59,7 @@ pub const State = struct {

        switch (state.mode) {
            .initial => switch (options.default_object) {
-                .string => state.document.root = Value.newString(arena_alloc),
+                .scalar => state.document.root = Value.emptyScalar(),
                .list => state.document.root = Value.newList(arena_alloc),
                .map => state.document.root = Value.newMap(arena_alloc),
                .fail => {
@@ -57,18 +69,18 @@ pub const State = struct {
                },
            },
            .value => switch (state.value_stack.getLast().*) {
-                // remove the final trailing newline or space
-                .string => |*string| _ = string.popOrNull(),
-                // if we have a dangling -, attach an empty string to it
-                .list => |*list| if (state.expect_shift == .indent) try list.append(Value.newScalar(arena_alloc)),
-                // if we have a dangling "key:", attach an empty string to it
+                // we have an in-progress string, finish it.
+                .string => |*string| string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0),
+                // if we have a dangling -, attach an empty scalar to it
+                .list => |*list| if (state.expect_shift == .indent) try list.append(Value.emptyScalar()),
+                // if we have a dangling "key:", attach an empty scalar to it
                .map => |*map| if (state.dangling_key) |dk| try state.putMap(
                    map,
                    dk,
-                    Value.newScalar(arena_alloc),
+                    Value.emptyScalar(),
                    options.duplicate_key_behavior,
                ),
-                .scalar, .flow_list, .flow_map => {},
+                .scalar, .inline_list, .inline_map => {},
            },
            .done => {},
        }
@@ -102,18 +114,18 @@ pub const State = struct {
                                state.document.root = try Value.fromScalar(arena_alloc, str);
                                state.mode = .done;
                            },
-                            .line_string, .space_string => |str| {
-                                state.document.root = try Value.fromString(arena_alloc, str);
-                                try state.document.root.string.append(in_line.lineEnding());
+                            .line_string, .space_string, .concat_string => |str| {
+                                state.document.root = Value.emptyString();
+                                try state.string_builder.appendSlice(arena_alloc, str);
                                try state.value_stack.append(&state.document.root);
                                state.mode = .value;
                            },
-                            .flow_list => |str| {
-                                state.document.root = try state.parseFlow(str, .flow_list, dkb);
+                            .inline_list => |str| {
+                                state.document.root = try state.parseFlow(str, .inline_list, dkb);
                                state.mode = .done;
                            },
-                            .flow_map => |str| {
-                                state.document.root = try state.parseFlow(str, .flow_map, dkb);
+                            .inline_map => |str| {
+                                state.document.root = try state.parseFlow(str, .inline_map, dkb);
                                state.mode = .done;
                            },
                        },
@@ -126,9 +138,9 @@ pub const State = struct {
                            switch (value) {
                                .empty => state.expect_shift = .indent,
                                .scalar => |str| try rootlist.append(try Value.fromScalar(arena_alloc, str)),
-                                .line_string, .space_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)),
-                                .flow_list => |str| try rootlist.append(try state.parseFlow(str, .flow_list, dkb)),
-                                .flow_map => |str| try rootlist.append(try state.parseFlow(str, .flow_map, dkb)),
+                                .line_string, .space_string, .concat_string => |str| try rootlist.append(try Value.fromString(arena_alloc, str)),
+                                .inline_list => |str| try rootlist.append(try state.parseFlow(str, .inline_list, dkb)),
+                                .inline_map => |str| try rootlist.append(try state.parseFlow(str, .inline_map, dkb)),
                            }
                        },
                        .map_item => |pair| {
@@ -144,32 +156,36 @@ pub const State = struct {
                                    state.dangling_key = dupekey;
                                },
                                .scalar => |str| try rootmap.put(dupekey, try Value.fromScalar(arena_alloc, str)),
-                                .line_string, .space_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)),
-                                .flow_list => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_list, dkb)),
-                                .flow_map => |str| try rootmap.put(dupekey, try state.parseFlow(str, .flow_map, dkb)),
+                                .line_string, .space_string, .concat_string => |str| try rootmap.put(dupekey, try Value.fromString(arena_alloc, str)),
+                                .inline_list => |str| try rootmap.put(dupekey, try state.parseFlow(str, .inline_list, dkb)),
+                                .inline_map => |str| try rootmap.put(dupekey, try state.parseFlow(str, .inline_map, dkb)),
                            }
                        },
                    }
                },
                .value => switch (state.value_stack.getLast().*) {
-                    // these three states are never reachable here. flow_list and
-                    // flow_map are parsed with a separate state machine. These
+                    // these three states are never reachable here. inline_list and
+                    // inline_map are parsed with a separate state machine. These
                    // value types can only be present by themselves as the first
                    // line of the document, in which case the document consists
                    // only of that single line: this parser jumps immediately into
                    // the .done state, bypassing the .value state in which this
                    // switch is embedded.
-                    .scalar, .flow_list, .flow_map => return error.Fail,
+                    .scalar, .inline_list, .inline_map => {
+                        state.diagnostics.length = 1;
+                        state.diagnostics.message = "the document contains invalid data following a single-line value";
+                        return error.Fail;
+                    },
                    .string => |*string| {
                        if (line.shift == .indent) {
                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the document contains an invalid indented line in a multiline string";
+                            state.diagnostics.message = "the document contains invalid indentation in a multiline string";
                            return error.UnexpectedIndent;
                        }

                        if (firstpass and line.shift == .dedent) {
-                            // kick off the last trailing space or newline
-                            _ = string.pop();
+                            // copy the string into the document proper
+                            string.* = try state.string_builder.toOwnedSliceSentinel(arena_alloc, 0);

                            var dedent_depth = line.shift.dedent;
                            while (dedent_depth > 0) : (dedent_depth -= 1)
@@ -182,9 +198,12 @@ pub const State = struct {
                            .comment => unreachable,
                            .in_line => |in_line| switch (in_line) {
                                .empty => unreachable,
-                                .line_string, .space_string => |str| {
-                                    try string.appendSlice(str);
-                                    try string.append(in_line.lineEnding());
+                                inline .line_string, .space_string, .concat_string => |str, tag| {
+                                    if (comptime tag == .line_string)
+                                        try state.string_builder.append(arena_alloc, '\n');
+                                    if (comptime tag == .space_string)
+                                        try state.string_builder.append(arena_alloc, ' ');
+                                    try state.string_builder.appendSlice(arena_alloc, str);
                                },
                                else => {
                                    state.diagnostics.length = 1;
@@ -208,7 +227,7 @@ pub const State = struct {
                        // the first line here creates the state.expect_shift, but the second line
                        // is a valid continuation of the list despite not being indented
                        if (firstpass and (state.expect_shift == .indent and line.shift != .indent))
-                            try list.append(Value.newScalar(arena_alloc));
+                            try list.append(Value.emptyScalar());

                        // Consider:
                        //
@@ -242,12 +261,16 @@ pub const State = struct {
                                state.expect_shift = .dedent;
                                switch (in_line) {
                                    .empty => unreachable,
-                                    .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
-                                    .flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)),
-                                    .flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)),
-                                    .line_string, .space_string => |str| {
-                                        const new_string = try appendListGetValue(list, try Value.fromString(arena_alloc, str));
-                                        try new_string.string.append(in_line.lineEnding());
+                                    .scalar => {
+                                        state.diagnostics.length = 1;
+                                        state.diagnostics.message = "the document may not contain a scalar value on its own line";
+                                        return error.UnexpectedValue;
+                                    },
+                                    .inline_list => |str| try list.append(try state.parseFlow(str, .inline_list, dkb)),
+                                    .inline_map => |str| try list.append(try state.parseFlow(str, .inline_map, dkb)),
+                                    .line_string, .space_string, .concat_string => |str| {
+                                        const new_string = try appendListGetValue(list, Value.emptyString());
+                                        try state.string_builder.appendSlice(arena_alloc, str);
                                        try state.value_stack.append(new_string);
                                        state.expect_shift = .none;
                                    },
@@ -259,9 +282,9 @@ pub const State = struct {
                                    switch (value) {
                                        .empty => state.expect_shift = .indent,
                                        .scalar => |str| try list.append(try Value.fromScalar(arena_alloc, str)),
-                                        .line_string, .space_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
-                                        .flow_list => |str| try list.append(try state.parseFlow(str, .flow_list, dkb)),
-                                        .flow_map => |str| try list.append(try state.parseFlow(str, .flow_map, dkb)),
+                                        .line_string, .space_string, .concat_string => |str| try list.append(try Value.fromString(arena_alloc, str)),
+                                        .inline_list => |str| try list.append(try state.parseFlow(str, .inline_list, dkb)),
+                                        .inline_map => |str| try list.append(try state.parseFlow(str, .inline_map, dkb)),
                                    }
                                } else if (line.shift == .indent) {
                                    if (state.expect_shift != .indent) return error.UnexpectedIndent;
@@ -284,7 +307,7 @@ pub const State = struct {

                                if (state.expect_shift != .indent or line.shift != .indent) {
                                    state.diagnostics.length = 1;
-                                    state.diagnostics.message = "the document contains an invalid map key in a list";
+                                    state.diagnostics.message = "the document contains a map item where a list item is expected";
                                    return error.UnexpectedValue;
                                }

@@ -311,7 +334,7 @@ pub const State = struct {
                                    state.diagnostics.message = "the document is somehow missing a key (this shouldn't be possible)";
                                    return error.Fail;
                                },
-                                Value.newScalar(arena_alloc),
+                                Value.emptyScalar(),
                                dkb,
                            );
                            state.dangling_key = null;
@@ -341,15 +364,19 @@ pub const State = struct {

                                switch (in_line) {
                                    .empty => unreachable,
-                                    .scalar => |str| try state.putMap(map, state.dangling_key.?, try Value.fromScalar(arena_alloc, str), dkb),
-                                    .flow_list => |str| try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .flow_list, dkb), dkb),
-                                    .flow_map => |str| {
-                                        try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .flow_map, dkb), dkb);
+                                    .scalar => {
+                                        state.diagnostics.length = 1;
+                                        state.diagnostics.message = "the document may not contain a scalar value on its own line";
+                                        return error.UnexpectedValue;
                                    },
-                                    .line_string, .space_string => |str| {
+                                    .inline_list => |str| try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .inline_list, dkb), dkb),
+                                    .inline_map => |str| {
+                                        try state.putMap(map, state.dangling_key.?, try state.parseFlow(str, .inline_map, dkb), dkb);
+                                    },
+                                    .line_string, .space_string, .concat_string => |str| {
                                        // string pushes the stack
-                                        const new_string = try state.putMapGetValue(map, state.dangling_key.?, try Value.fromString(arena_alloc, str), dkb);
-                                        try new_string.string.append(in_line.lineEnding());
+                                        const new_string = try state.putMapGetValue(map, state.dangling_key.?, Value.emptyString(), dkb);
+                                        try state.string_builder.appendSlice(arena_alloc, str);
                                        try state.value_stack.append(new_string);
                                        state.expect_shift = .none;
                                    },
@@ -368,7 +395,7 @@ pub const State = struct {

                                if (state.expect_shift != .indent or line.shift != .indent or state.dangling_key == null) {
                                    state.diagnostics.length = 1;
-                                    state.diagnostics.message = "the document contains an invalid list item in a map";
+                                    state.diagnostics.message = "the document contains a list item where a map item is expected";
                                    return error.UnexpectedValue;
                                }

@@ -388,9 +415,9 @@ pub const State = struct {
                                            state.dangling_key = dupekey;
                                        },
                                        .scalar => |str| try state.putMap(map, dupekey, try Value.fromScalar(arena_alloc, str), dkb),
-                                        .line_string, .space_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb),
-                                        .flow_list => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_list, dkb), dkb),
-                                        .flow_map => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .flow_map, dkb), dkb),
+                                        .line_string, .space_string, .concat_string => |str| try state.putMap(map, dupekey, try Value.fromString(arena_alloc, str), dkb),
+                                        .inline_list => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .inline_list, dkb), dkb),
+                                        .inline_map => |str| try state.putMap(map, dupekey, try state.parseFlow(str, .inline_map, dkb), dkb),
                                    }
                                } else if (line.shift == .indent) {
                                    if (state.expect_shift != .indent or state.dangling_key == null) {
@@ -429,17 +456,17 @@ pub const State = struct {
        const arena_alloc = state.document.arena.allocator();

        var root: Value = switch (root_type) {
-            .flow_list => Value.newFlowList(arena_alloc),
-            .flow_map => Value.newFlowMap(arena_alloc),
+            .inline_list => Value.newFlowList(arena_alloc),
+            .inline_map => Value.newFlowMap(arena_alloc),
            else => {
                state.diagnostics.length = 1;
-                state.diagnostics.message = "the flow item was closed too many times";
+                state.diagnostics.message = "the inline map or list was closed too many times";
                return error.BadState;
            },
        };
        var pstate: FlowParseState = switch (root_type) {
-            .flow_list => .want_list_item,
-            .flow_map => .want_map_key,
+            .inline_list => .want_list_item,
+            .inline_map => .want_map_key,
            else => unreachable,
        };

@@ -452,19 +479,19 @@ pub const State = struct {
        charloop: for (contents, 0..) |char, idx| {
            switch (pstate) {
                .want_list_item => switch (char) {
-                    ' ', '\t' => continue :charloop,
+                    ' ' => continue :charloop,
+                    '\t' => return error.IllegalTabWhitespaceInLine,
                    ',' => {
                        // empty value
-                        // don't check for whitespace here: [ , ] is okay, as is [ , , ]
                        const tip = try state.getStackTip();
-                        try tip.flow_list.append(Value.newScalar(arena_alloc));
+                        try tip.inline_list.append(Value.emptyScalar());
                        item_start = idx + 1;
                    },
                    '{' => {
                        const tip = try state.getStackTip();

                        const new_map = try appendListGetValue(
-                            &tip.flow_list,
+                            &tip.inline_list,
                            Value.newFlowMap(arena_alloc),
                        );

@@ -476,7 +503,7 @@ pub const State = struct {
                        const tip = try state.getStackTip();

                        const new_list = try appendListGetValue(
-                            &tip.flow_list,
+                            &tip.inline_list,
                            Value.newFlowList(arena_alloc),
                        );

@@ -487,11 +514,11 @@ pub const State = struct {
                    ']' => {
                        const finished = state.value_stack.getLastOrNull() orelse {
                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the flow list was closed too many times";
+                            state.diagnostics.message = "the inline list was closed too many times";
                            return error.BadState;
                        };
-                        if (finished.flow_list.items.len > 0 or idx > item_start)
-                            try finished.flow_list.append(Value.newScalar(arena_alloc));
+                        if (finished.inline_list.items.len > 0 or idx > item_start)
+                            try finished.inline_list.append(Value.emptyScalar());
                        pstate = try state.popFlowStack();
                    },
                    else => {
@@ -500,42 +527,40 @@ pub const State = struct {
                    },
                },
                .consuming_list_item => switch (char) {
-                    // consider: detecting trailing whitespace. "[ 1 ]" should
-                    // produce "1" and not "1 " as it currently does, which breaks
-                    // the principle of least astonishment. design: no trailing
-                    // whitespace before "," and only a single space is allowed before "]"
                    ',' => {
-                        if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
-                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the flow list contains whitespace before ,";
-                            return error.TrailingWhitespace;
-                        }
+                        const end = end: {
+                            var countup = @max(idx, 1) - 1;
+                            while (countup > 0) : (countup -= 1) {
+                                if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine;
+                                if (contents[countup] != ' ') break :end countup + 1;
+                            }
+                            break :end countup;
+                        };

                        const tip = try state.getStackTip();
-                        try tip.flow_list.append(
-                            try Value.fromScalar(arena_alloc, contents[item_start..idx]),
+                        try tip.inline_list.append(
+                            try Value.fromScalar(arena_alloc, contents[item_start..end]),
                        );
                        item_start = idx + 1;

                        pstate = .want_list_item;
                    },
                    ']' => {
-                        var end = idx;
-                        if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
-                            if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) {
-                                state.diagnostics.length = 1;
-                                state.diagnostics.message = "the flow list contains extra whitespace before ]";
-                                return error.TrailingWhitespace;
+                        const end = end: {
+                            var countup = @max(idx, 1) - 1;
+                            while (countup > 0) : (countup -= 1) {
+                                if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine;
+                                if (contents[countup] != ' ') break :end countup + 1;
                            }
-                            end = idx - 1;
-                        }
+                            break :end countup;
+                        };

                        const finished = state.value_stack.getLastOrNull() orelse {
                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the flow list was closed too many times";
+                            state.diagnostics.message = "the inline list was closed too many times";
                            return error.BadState;
                        };
-                        try finished.flow_list.append(
+                        try finished.inline_list.append(
                            try Value.fromScalar(arena_alloc, contents[item_start..end]),
                        );
                        pstate = try state.popFlowStack();
@@ -543,7 +568,8 @@ pub const State = struct {
                    else => continue :charloop,
                },
                .want_list_separator => switch (char) {
-                    ' ', '\t' => continue :charloop,
+                    ' ' => continue :charloop,
+                    '\t' => return error.IllegalTabWhitespaceInLine,
                    ',' => {
                        item_start = idx;
                        pstate = .want_list_item;
@@ -551,18 +577,24 @@ pub const State = struct {
                    ']' => pstate = try state.popFlowStack(),
                    else => return {
                        state.diagnostics.length = 1;
-                        state.diagnostics.message = "the document contains an invalid flow list separator";
+                        state.diagnostics.message = "the document contains an invalid inline list separator";
                        return error.BadToken;
                    },
                },
                .want_map_key => switch (char) {
-                    ' ', '\t' => continue :charloop,
-                    // forbid these characters so that flow dictionary keys cannot start
+                    ' ' => continue :charloop,
+                    '\t' => return error.IllegalTabWhitespaceInLine,
+                    // forbid these characters so that inline dictionary keys cannot start
                    // with characters that regular dictionary keys cannot start with
                    // (even though they're unambiguous in this specific context).
-                    '{', '[', '#', '-', '>', '|', ',' => return {
+                    '{', '[', '#', ',' => return {
                        state.diagnostics.length = 1;
-                        state.diagnostics.message = "this document contains a flow map key that starts with an invalid character";
+                        state.diagnostics.message = "this document contains a inline map key that starts with an invalid character";
+                        return error.BadToken;
+                    },
+                    '-', '>', '+', '|' => if ((idx + 1) < contents.len and contents[idx + 1] == ' ') {
+                        state.diagnostics.length = 1;
+                        state.diagnostics.message = "this document contains a inline map key that starts with an invalid sequence";
                        return error.BadToken;
                    },
                    ':' => {
@@ -578,24 +610,28 @@ pub const State = struct {
                },
                .consuming_map_key => switch (char) {
                    ':' => {
-                        if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
-                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the flow map contains whitespace before :";
-                            return error.TrailingWhitespace;
-                        }
-                        dangling_key = try arena_alloc.dupe(u8, contents[item_start..idx]);
+                        const end = end: {
+                            var countup = @max(idx, 1) - 1;
+                            while (countup > 0) : (countup -= 1) {
+                                if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine;
+                                if (contents[countup] != ' ') break :end countup + 1;
+                            }
+                            break :end countup;
+                        };
+                        dangling_key = try arena_alloc.dupe(u8, contents[item_start..end]);
                        pstate = .want_map_value;
                    },
                    else => continue :charloop,
                },
                .want_map_value => switch (char) {
-                    ' ', '\t' => continue :charloop,
+                    ' ' => continue :charloop,
+                    '\t' => return error.IllegalTabWhitespaceInLine,
                    ',' => {
                        const tip = try state.getStackTip();
                        try state.putMap(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
-                            Value.newScalar(arena_alloc),
+                            Value.emptyScalar(),
                            dkb,
                        );

@@ -606,7 +642,7 @@ pub const State = struct {
                        const tip = try state.getStackTip();

                        const new_list = try state.putMapGetValue(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
                            Value.newFlowList(arena_alloc),
                            dkb,
@@ -621,7 +657,7 @@ pub const State = struct {
                        const tip = try state.getStackTip();

                        const new_map = try state.putMapGetValue(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
                            Value.newFlowMap(arena_alloc),
                            dkb,
@@ -635,9 +671,9 @@ pub const State = struct {
                        // the value is an empty string and this map is closed
                        const tip = try state.getStackTip();
                        try state.putMap(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
-                            Value.newScalar(arena_alloc),
+                            Value.emptyScalar(),
                            dkb,
                        );

@@ -651,35 +687,38 @@ pub const State = struct {
                },
                .consuming_map_value => switch (char) {
                    ',' => {
-                        if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
-                            state.diagnostics.length = 1;
-                            state.diagnostics.message = "the flow map contains whitespace before ,";
-                            return error.TrailingWhitespace;
-                        }
+                        const end = end: {
+                            var countup = @max(idx, 1) - 1;
+                            while (countup > 0) : (countup -= 1) {
+                                if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine;
+                                if (contents[countup] != ' ') break :end countup + 1;
+                            }
+                            break :end countup;
+                        };
+
                        const tip = try state.getStackTip();
                        try state.putMap(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
-                            try Value.fromScalar(arena_alloc, contents[item_start..idx]),
+                            try Value.fromScalar(arena_alloc, contents[item_start..end]),
                            dkb,
                        );
                        dangling_key = null;
                        pstate = .want_map_key;
                    },
                    '}' => {
-                        var end = idx;
-                        if (contents[idx - 1] == ' ' or contents[idx - 1] == '\t') {
-                            if (idx > 1 and (contents[idx - 2] == ' ' or contents[idx - 2] == '\t')) {
-                                state.diagnostics.length = 1;
-                                state.diagnostics.message = "the flow map contains extra whitespace before }";
-                                return error.TrailingWhitespace;
+                        const end = end: {
+                            var countup = @max(idx, 1) - 1;
+                            while (countup > 0) : (countup -= 1) {
+                                if (contents[countup] == '\t') return error.IllegalTabWhitespaceInLine;
+                                if (contents[countup] != ' ') break :end countup + 1;
                            }
-                            end = idx - 1;
-                        }
+                            break :end countup;
+                        };

                        const tip = try state.getStackTip();
                        try state.putMap(
-                            &tip.flow_map,
+                            &tip.inline_map,
                            dangling_key.?,
                            try Value.fromScalar(arena_alloc, contents[item_start..end]),
                            dkb,
@@ -690,12 +729,13 @@ pub const State = struct {
                    else => continue :charloop,
                },
                .want_map_separator => switch (char) {
-                    ' ', '\t' => continue :charloop,
+                    ' ' => continue :charloop,
+                    '\t' => return error.IllegalTabWhitespaceInLine,
                    ',' => pstate = .want_map_key,
                    '}' => pstate = try state.popFlowStack(),
                    else => return {
                        state.diagnostics.length = 1;
-                        state.diagnostics.message = "this document contains an invalid character instead of a flow map separator";
+                        state.diagnostics.message = "this document contains an invalid character instead of a inline map separator";
                        return error.BadToken;
                    },
                },
@@ -711,7 +751,7 @@ pub const State = struct {
        // we ran out of characters while still in the middle of an object
        if (pstate != .done) return {
            state.diagnostics.length = 1;
-            state.diagnostics.message = "this document contains an unterminated flow item";
+            state.diagnostics.message = "this document contains an unterminated inline map or list";
            return error.BadState;
        };

@@ -736,8 +776,8 @@ pub const State = struct {
        const parent = state.value_stack.getLastOrNull() orelse return .done;

        return switch (parent.*) {
-            .flow_list => .want_list_separator,
-            .flow_map => .want_map_separator,
+            .inline_list => .want_list_separator,
+            .inline_map => .want_map_separator,
            else => .done,
        };
    }
@@ -762,7 +802,10 @@ pub const State = struct {
                    return error.DuplicateKey;
                },
                .use_first => {},
-                .use_last => gop.value_ptr.* = value,
+                .use_last => {
+                    _ = map.orderedRemove(key);
+                    map.putAssumeCapacityNoClobber(key, value);
+                },
            }
        else
            gop.value_ptr.* = value;
--- a/src/parser/value.zig
+++ b/src/parser/value.zig
@@ -1,4 +1,19 @@
+// Copyright 2023 torque@epicyclic.dev
+//
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
+//
+//    https://spdx.org/licenses/MIT.html
+//
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.
+
 const std = @import("std");
+const hasFn = if (@hasDecl(std.meta, "trait")) struct {
+    fn hasFn(comptime T: type, comptime name: []const u8) bool {
+        return std.meta.trait.hasFn(name)(T);
+    }
+}.hasFn else std.meta.hasFn;

 const Options = @import("../parser.zig").Options;

@@ -41,7 +56,7 @@ pub fn Parsed(comptime T: type) type {
 }

 pub const Value = union(enum) {
-    pub const String = std.ArrayList(u8);
+    pub const String = [:0]const u8;
    pub const Map = std.StringArrayHashMap(Value);
    pub const List = std.ArrayList(Value);
    pub const TagType = @typeInfo(Value).Union.tag_type.?;
@@ -49,16 +64,20 @@ pub const Value = union(enum) {
    scalar: String,
    string: String,
    list: List,
-    flow_list: List,
+    inline_list: List,
    map: Map,
-    flow_map: Map,
+    inline_map: Map,
+
+    pub fn FieldConverter(comptime T: type) type {
+        return *const fn (Value, std.mem.Allocator, Options) error{BadValue}!T;
+    }

    pub fn convertTo(self: Value, comptime T: type, allocator: std.mem.Allocator, options: Options) !T {
        switch (@typeInfo(T)) {
            .Void => {
                switch (self) {
-                    .scalar => |str| return if (str.items.len == 0) void{} else error.BadValue,
-                    .string => |str| return if (options.coerce_strings and str.items.len == 0) void{} else error.BadValue,
+                    .scalar => |str| return if (str.len == 0) void{} else error.BadValue,
+                    .string => |str| return if (options.coerce_strings and str.len == 0) void{} else error.BadValue,
                    else => return error.BadValue,
                }
            },
@@ -66,10 +85,17 @@ pub const Value = union(enum) {
                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        for (options.boolean_strings.truthy) |check|
-                            if (std.mem.eql(u8, str.items, check)) return true;
-                        for (options.boolean_strings.falsy) |check|
-                            if (std.mem.eql(u8, str.items, check)) return false;
+                        if (options.case_insensitive_scalar_coersion) {
+                            for (options.truthy_boolean_scalars) |check|
+                                if (std.ascii.eqlIgnoreCase(str, check)) return true;
+                            for (options.falsy_boolean_scalars) |check|
+                                if (std.ascii.eqlIgnoreCase(str, check)) return false;
+                        } else {
+                            for (options.truthy_boolean_scalars) |check|
+                                if (std.mem.eql(u8, str, check)) return true;
+                            for (options.falsy_boolean_scalars) |check|
+                                if (std.mem.eql(u8, str, check)) return false;
+                        }

                        return error.BadValue;
                    },
@@ -80,8 +106,7 @@ pub const Value = union(enum) {
                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        std.debug.print("'{s}'\n", .{str.items});
-                        return try std.fmt.parseInt(T, str.items, 0);
+                        return try std.fmt.parseInt(T, str, 0);
                    },
                    else => return error.BadValue,
                }
@@ -90,7 +115,7 @@ pub const Value = union(enum) {
                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        return try std.fmt.parseFloat(T, str.items, 0);
+                        return try std.fmt.parseFloat(T, str);
                    },
                    else => return error.BadValue,
                }
@@ -102,16 +127,32 @@ pub const Value = union(enum) {
                    //       type to use for this? the problem is that it becomes
                    //       invasive into downstream code. Ultimately this should
                    //       probably be solved in the zig stdlib or similar.
-                    // TODO: This also doesn't handle sentinels properly.
                    switch (self) {
-                        .scalar, .string => |str| return if (ptr.child == u8) str.items else error.BadValue,
-                        .list, .flow_list => |lst| {
-                            var result = try std.ArrayList(ptr.child).initCapacity(allocator, lst.items.len);
-                            errdefer result.deinit();
-                            for (lst.items) |item| {
-                                result.appendAssumeCapacity(try item.convertTo(ptr.child, allocator, options));
+                        .scalar, .string => |str| {
+                            if (comptime ptr.child == u8) {
+                                if (comptime ptr.sentinel) |sentinel|
+                                    if (comptime @as(*align(1) const ptr.child, @ptrCast(sentinel)).* != 0)
+                                        return error.BadValue;
+
+                                return str;
+                            } else {
+                                return error.BadValue;
+                            }
+                        },
+                        .list, .inline_list => |lst| {
+                            const result = try allocator.alloc(ptr.child, lst.items.len + @intFromBool(ptr.sentinel != null));
+
+                            for (result[0..lst.items.len], lst.items) |*res, item| {
+                                res.* = try item.convertTo(ptr.child, allocator, options);
+                            }
+
+                            if (comptime ptr.sentinel) |sentinel| {
+                                const sval = @as(*align(1) const ptr.child, @ptrCast(sentinel)).*;
+                                result[lst.items.len] = sval;
+                                return result[0..lst.items.len :sval];
+                            } else {
+                                return result;
                            }
-                            return result.toOwnedSlice();
                        },
                        else => return error.BadValue,
                    }
@@ -122,7 +163,7 @@ pub const Value = union(enum) {
                    result.* = try self.convertTo(ptr.child, allocator, options);
                    return result;
                },
-                else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)), // do not support many or C item pointers.
+                else => @compileError("Cannot deserialize into many-pointer or c-pointer " ++ @typeName(T)),
            },
            .Array => |arr| {
                // TODO: There is ambiguity here because a document expecting a list
@@ -130,40 +171,37 @@ pub const Value = union(enum) {
                //       type to use for this? the problem is that it becomes
                //       invasive into downstream code. Ultimately this should
                //       probably be solved in the zig stdlib or similar.
-                // TODO: This also doesn't handle sentinels properly.
                switch (self) {
                    .scalar, .string => |str| {
-                        if (arr.child == u8 and str.items.len == arr.len) {
+                        if (arr.child == u8 and str.len == arr.len) {
                            var result: T = undefined;
-                            @memcpy(&result, str.items);
+                            @memcpy(&result, str);
                            return result;
                        } else return error.BadValue;
                    },
-                    .list, .flow_list => |lst| {
-                        var storage = try std.ArrayList(arr.child).initCapacity(allocator, arr.len);
-                        defer storage.deinit();
-                        for (lst.items) |item| {
-                            storage.appendAssumeCapacity(try item.convertTo(arr.child, allocator, options));
-                        }
-                        // this may result in a big stack allocation, which is not ideal
+                    .list, .inline_list => |lst| {
+                        if (lst.items.len != arr.len) return error.BadValue;
+
                        var result: T = undefined;
-                        @memcpy(&result, storage.items);
+                        for (&result, lst.items) |*res, item| {
+                            res.* = try item.convertTo(arr.child, allocator, options);
+                        }
                        return result;
                    },
                    else => return error.BadValue,
                }
            },
            .Struct => |stt| {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                if (stt.is_tuple) {
                    switch (self) {
-                        .list, .flow_list => |list| {
+                        .list, .inline_list => |list| {
                            if (list.items.len != stt.fields.len) return error.BadValue;
                            var result: T = undefined;
-                            inline for (stt.fields, 0..) |field, idx| {
-                                result[idx] = try list.items[idx].convertTo(field.type, allocator, options);
+                            inline for (stt.fields, &result, list.items) |field, *res, item| {
+                                res.* = try item.convertTo(field.type, allocator, options);
                            }
                            return result;
                        },
@@ -172,37 +210,31 @@ pub const Value = union(enum) {
                }

                switch (self) {
-                    .map, .flow_map => |map| {
+                    .map, .inline_map => |map| {
                        var result: T = undefined;

-                        if (options.ignore_extra_fields) {
-                            inline for (stt.fields) |field| {
-                                if (map.get(field.name)) |value| {
-                                    @field(result, field.name) = try value.convertTo(field.type, allocator, options);
-                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
-                                    @field(result, field.name) = null;
+                        if (!options.ignore_extra_fields and (map.count() > stt.fields.len))
+                            return error.BadValue;
+
+                        var use_count: usize = 0;
+                        inline for (stt.fields) |field| {
+                            if (map.get(field.name)) |val| {
+                                if (comptime hasFn(T, "niceFieldConverter") and T.niceFieldConverter(field.name) != null) {
+                                    @field(result, field.name) = try T.niceFieldConverter(field.name).?(val, allocator, options);
                                } else {
-                                    std.debug.print("{s}\n", .{field.name});
-                                    return error.BadValue;
+                                    @field(result, field.name) = try val.convertTo(field.type, allocator, options);
                                }
-                            }
-                        } else {
-                            // we could iterate over each map key and do an exhaustive
-                            // comparison with each struct field name. This would save
-                            // memory and it would probably be a fair amount faster for
-                            // small structs.
-                            var clone = try map.clone();
-                            defer clone.deinit();
-                            inline for (stt.fields) |field| {
-                                if (clone.fetchSwapRemove(field.name)) |kv| {
-                                    @field(result, field.name) = try kv.value.convertTo(field.type, allocator, options);
-                                } else if (options.treat_omitted_as_null and @typeInfo(field.type) == .Optional) {
-                                    @field(result, field.name) = null;
-                                } else return error.BadValue;
-                            }
-                            // there were extra fields in the data
-                            if (clone.count() > 0) return error.BadValue;
+                                use_count += 1;
+                            } else if (options.allow_omitting_default_values) {
+                                if (comptime field.default_value) |def|
+                                    @field(result, field.name) = @as(*align(1) const field.type, @ptrCast(def)).*
+                                else
+                                    return error.BadValue;
+                            } else return error.BadValue;
                        }
+                        // there were extra fields in the data
+                        if (!options.ignore_extra_fields and (map.count() > use_count))
+                            return error.BadValue;

                        return result;
                    },
@@ -210,15 +242,22 @@ pub const Value = union(enum) {
                }
            },
            .Enum => {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        if (std.meta.stringToEnum(T, str.items)) |value| return value;
+                        const name = if (options.expect_enum_dot) blk: {
+                            if (str.len > 0 and str[0] == '.')
+                                break :blk str[1..]
+                            else
+                                return error.BadValue;
+                        } else str;
+
+                        if (std.meta.stringToEnum(T, name)) |value| return value;
                        if (options.allow_numeric_enums) {
-                            const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str.items, 10) catch
+                            const parsed = std.fmt.parseInt(@typeInfo(T).Enum.tag_type, str, 10) catch
                                return error.BadValue;
                            return std.meta.intToEnum(T, parsed) catch error.BadValue;
                        }
@@ -228,25 +267,52 @@ pub const Value = union(enum) {
                }
            },
            .Union => |unn| {
-                if (comptime std.meta.trait.hasFn("deserializeNice")(T))
+                if (comptime hasFn(T, "deserializeNice"))
                    return T.deserializeNice(self, allocator, options);

                if (unn.tag_type == null) @compileError("Cannot deserialize into untagged union " ++ @typeName(T));

                switch (self) {
-                    .map, .flow_map => |map| {
-                        // a union may not ever be deserialized from a map with more than one value
+                    .map, .inline_map => |map| {
+                        // a union may not ever be deserialized from a map with more
+                        // (or less) than one value
                        if (map.count() != 1) return error.BadValue;
                        const key = map.keys()[0];
+                        const name = if (options.expect_enum_dot) blk: {
+                            if (key.len > 0 and key[0] == '.')
+                                break :blk key[1..]
+                            else
+                                return error.BadValue;
+                        } else key;
+
                        inline for (unn.fields) |field| {
-                            if (std.mem.eql(u8, key, field.name))
+                            if (std.mem.eql(u8, name, field.name))
                                return @unionInit(T, field.name, try map.get(key).?.convertTo(field.type, allocator, options));
                        }
                        return error.BadValue;
                    },
-                    // TODO: if the field is a 0 width type like void, we could parse it
-                    //       directly from a scalar/string value (i.e. a name with no
-                    //       corresponding value)
+                    inline .scalar, .string => |str, tag| {
+                        if (tag == .string and !options.coerce_strings) return error.BadValue;
+                        const name = if (options.expect_enum_dot) blk: {
+                            if (str.len > 0 and str[0] == '.')
+                                break :blk str[1..]
+                            else
+                                return error.BadValue;
+                        } else str;
+
+                        inline for (unn.fields) |field| {
+                            if (@sizeOf(field.type) != 0) continue;
+                            // this logic may be a little off: comtime_int,
+                            // comptime_float, and type will all have size 0 because
+                            // they can't be used at runtime. On the other hand, trying
+                            // to use them here should result in a compile error? Also,
+                            // it's a 0 sized type so initializing it as undefined
+                            // shouldn't be a problem. As far as I know.
+                            if (std.mem.eql(u8, name, field.name))
+                                return @unionInit(T, field.name, undefined);
+                        }
+                        return error.BadValue;
+                    },
                    else => return error.BadValue,
                }
            },
@@ -254,8 +320,13 @@ pub const Value = union(enum) {
                switch (self) {
                    inline .scalar, .string => |str, tag| {
                        if (tag == .string and !options.coerce_strings) return error.BadValue;
-                        for (options.null_strings) |check|
-                            if (std.mem.eql(u8, str.items, check)) return null;
+                        if (options.case_insensitive_scalar_coersion) {
+                            for (options.null_scalars) |check|
+                                if (std.ascii.eqlIgnoreCase(str, check)) return null;
+                        } else {
+                            for (options.null_scalars) |check|
+                                if (std.mem.eql(u8, str, check)) return null;
+                        }

                        return try self.convertTo(opt.child, allocator, options);
                    },
@@ -275,17 +346,15 @@ pub const Value = union(enum) {
    }

    inline fn _fromScalarOrString(alloc: std.mem.Allocator, comptime classification: TagType, input: []const u8) !Value {
-        var res = @unionInit(Value, @tagName(classification), try String.initCapacity(alloc, input.len));
-        @field(res, @tagName(classification)).appendSliceAssumeCapacity(input);
-        return res;
+        return @unionInit(Value, @tagName(classification), try alloc.dupeZ(u8, input));
    }

-    pub inline fn newScalar(alloc: std.mem.Allocator) Value {
-        return .{ .scalar = String.init(alloc) };
+    pub inline fn emptyScalar() Value {
+        return .{ .scalar = "" };
    }

-    pub inline fn newString(alloc: std.mem.Allocator) Value {
-        return .{ .string = String.init(alloc) };
+    pub inline fn emptyString() Value {
+        return .{ .string = "" };
    }

    pub inline fn newList(alloc: std.mem.Allocator) Value {
@@ -293,7 +362,7 @@ pub const Value = union(enum) {
    }

    pub inline fn newFlowList(alloc: std.mem.Allocator) Value {
-        return .{ .flow_list = List.init(alloc) };
+        return .{ .inline_list = List.init(alloc) };
    }

    pub inline fn newMap(alloc: std.mem.Allocator) Value {
@@ -301,21 +370,21 @@ pub const Value = union(enum) {
    }

    pub inline fn newFlowMap(alloc: std.mem.Allocator) Value {
-        return .{ .flow_map = Map.init(alloc) };
+        return .{ .inline_map = Map.init(alloc) };
    }

    pub fn recursiveEqualsExact(self: Value, other: Value) bool {
        if (@as(TagType, self) != other) return false;
        switch (self) {
-            inline .scalar, .string => |str, tag| return std.mem.eql(u8, str.items, @field(other, @tagName(tag)).items),
-            inline .list, .flow_list => |lst, tag| {
+            inline .scalar, .string => |str, tag| return std.mem.eql(u8, str, @field(other, @tagName(tag))),
+            inline .list, .inline_list => |lst, tag| {
                const olst = @field(other, @tagName(tag));

                if (lst.items.len != olst.items.len) return false;
                for (lst.items, olst.items) |this, that| if (!this.recursiveEqualsExact(that)) return false;
                return true;
            },
-            inline .map, .flow_map => |map, tag| {
+            inline .map, .inline_map => |map, tag| {
                const omap = @field(other, @tagName(tag));

                if (map.count() != omap.count()) return false;
@@ -341,8 +410,8 @@ pub const Value = union(enum) {
    fn printRecursive(self: Value, indent: usize) void {
        switch (self) {
            .scalar, .string => |str| {
-                if (std.mem.indexOfScalar(u8, str.items, '\n')) |_| {
-                    var lines = std.mem.splitScalar(u8, str.items, '\n');
+                if (std.mem.indexOfScalar(u8, str, '\n')) |_| {
+                    var lines = std.mem.splitScalar(u8, str, '\n');
                    std.debug.print("\n", .{});
                    while (lines.next()) |line| {
                        std.debug.print(
@@ -356,10 +425,10 @@ pub const Value = union(enum) {
                        );
                    }
                } else {
-                    std.debug.print("{s}", .{str.items});
+                    std.debug.print("{s}", .{str});
                }
            },
-            .list, .flow_list => |list| {
+            .list, .inline_list => |list| {
                if (list.items.len == 0) {
                    std.debug.print("[]", .{});
                    return;
@@ -376,7 +445,7 @@ pub const Value = union(enum) {
                    .{ .empty = "", .indent = indent },
                );
            },
-            .map, .flow_map => |map| {
+            .map, .inline_map => |map| {
                if (map.count() == 0) {
                    std.debug.print("{{}}", .{});
                    return;
--- a/src/tokenizer.zig
+++ b/src/tokenizer.zig
@@ -1,3 +1,13 @@
+// Copyright 2023 torque@epicyclic.dev
+//
+// Licensed under the MIT/Expat license. You may not use this file except in
+// compliance with the license. You may obtain a copy of the license at
+//
+//    https://spdx.org/licenses/MIT.html
+//
+// This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied.
+
 const std = @import("std");

 const Diagnostics = @import("./parser.zig").Diagnostics;
@@ -9,6 +19,7 @@ pub const Error = error{
    TooMuchIndentation,
    UnquantizedIndentation,
    TrailingWhitespace,
+    IllegalTabWhitespaceInLine,
    Impossible,
 };

@@ -23,17 +34,10 @@ pub const InlineItem = union(enum) {
    scalar: []const u8,
    line_string: []const u8,
    space_string: []const u8,
+    concat_string: []const u8,

-    flow_list: []const u8,
-    flow_map: []const u8,
-
-    pub fn lineEnding(self: InlineItem) u8 {
-        return switch (self) {
-            .line_string => '\n',
-            .space_string => ' ',
-            else => unreachable,
-        };
-    }
+    inline_list: []const u8,
+    inline_map: []const u8,
 };

 pub const LineContents = union(enum) {
@@ -169,96 +173,113 @@ pub fn LineTokenizer(comptime Buffer: type) type {
                // this should not be possible, as empty lines are caught earlier.
                if (line.len == 0) return error.Impossible;

-                switch (line[0]) {
-                    '#' => {
-                        // force comments to be followed by a space. This makes them
-                        // behave the same way as strings, actually.
-                        if (line.len > 1 and line[1] != ' ') {
-                            self.buffer.diag().line_offset += 1;
-                            self.buffer.diag().length = 1;
-                            self.buffer.diag().message = "this line is missing a space after the start of comment character '#'";
-                            return error.BadToken;
-                        }
-
-                        // simply lie about indentation when the line is a comment.
-                        quantized = self.last_indent;
-                        return .{
-                            .shift = .none,
-                            .contents = .{ .comment = line[1..] },
-                            .raw = line,
-                        };
-                    },
-                    '|', '>', '[', '{' => {
-                        return .{
-                            .shift = shift,
-                            .contents = .{ .in_line = try self.detectInlineItem(line) },
-                            .raw = line,
-                        };
-                    },
-                    '-' => {
-                        if (line.len > 1 and line[1] != ' ') {
-                            self.buffer.diag().line_offset += 1;
-                            self.buffer.diag().length = 1;
-                            self.buffer.diag().message = "this line is missing a space after the list entry character '-'";
-                            return error.BadToken;
-                        }
-
-                        // blindly add 2 here because an empty item cannot fail in
-                        // the value, only if a bogus dedent has occurred
-                        self.buffer.diag().line_offset += 2;
-
-                        return if (line.len == 1) .{
-                            .shift = shift,
-                            .contents = .{ .list_item = .empty },
-                            .raw = line,
-                        } else .{
-                            .shift = shift,
-                            .contents = .{ .list_item = try self.detectInlineItem(line[2..]) },
-                            .raw = line,
-                        };
-                    },
-                    else => {
-                        for (line, 0..) |char, idx| {
-                            if (char == ':') {
-                                self.buffer.diag().line_offset += idx + 2;
-
-                                if (idx + 1 == line.len) return .{
-                                    .shift = shift,
-                                    .contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
-                                    .raw = line,
-                                };
-
-                                if (line[idx + 1] != ' ') {
-                                    self.buffer.diag().line_offset += idx + 1;
-                                    self.buffer.diag().length = 1;
-                                    self.buffer.diag().message = "this line is missing a space after the map key-value separator character ':'";
-                                    return error.BadToken;
-                                }
-
-                                return .{
-                                    .shift = shift,
-                                    .contents = .{ .map_item = .{
-                                        .key = line[0..idx],
-                                        .val = try self.detectInlineItem(line[idx + 2 ..]),
-                                    } },
-                                    .raw = line,
-                                };
+                sigil: {
+                    switch (line[0]) {
+                        '#' => {
+                            // Force comments to be followed by a space. We could
+                            // allow #: to be interpreted as a map key, but I'm going
+                            // to specifically forbid it instead.
+                            if (line.len > 1 and line[1] != ' ') {
+                                self.buffer.diag().line_offset += 1;
+                                self.buffer.diag().length = 1;
+                                self.buffer.diag().message = "this line is missing a space after the start of comment character '#'";
+                                return error.BadToken;
                            }
-                        }

-                        return .{
-                            .shift = shift,
-                            .contents = .{ .in_line = .{ .scalar = line } },
-                            .raw = line,
-                        };
-                    },
+                            // simply lie about indentation when the line is a comment.
+                            quantized = self.last_indent;
+                            return .{
+                                .shift = .none,
+                                .contents = .{ .comment = line[1..] },
+                                .raw = line,
+                            };
+                        },
+                        '|', '>', '+' => {
+                            if (line.len > 1 and line[1] != ' ') {
+                                // we want to try parsing this as a map key
+                                break :sigil;
+                            }
+
+                            return .{
+                                .shift = shift,
+                                .contents = .{ .in_line = try self.detectInlineItem(line) },
+                                .raw = line,
+                            };
+                        },
+                        '[', '{' => {
+                            // these don't require being followed by a space, so they
+                            // cannot be interpreted as starting a map key in any way.
+                            return .{
+                                .shift = shift,
+                                .contents = .{ .in_line = try self.detectInlineItem(line) },
+                                .raw = line,
+                            };
+                        },
+                        '-' => {
+                            if (line.len > 1 and line[1] != ' ') {
+                                // we want to try parsing this as a map key
+                                break :sigil;
+                            }
+
+                            // blindly add 2 here because an empty item cannot fail in
+                            // the value, only if a bogus dedent has occurred
+                            self.buffer.diag().line_offset += 2;
+
+                            return if (line.len == 1) .{
+                                .shift = shift,
+                                .contents = .{ .list_item = .empty },
+                                .raw = line,
+                            } else .{
+                                .shift = shift,
+                                .contents = .{ .list_item = try self.detectInlineItem(line[2..]) },
+                                .raw = line,
+                            };
+                        },
+                        else => break :sigil,
+                    }
                }

-                // somehow everything else has failed
-                self.buffer.diag().line_offset = 0;
-                self.buffer.diag().length = raw_line.len;
-                self.buffer.diag().message = "this document contains an unknown error. Please report this.";
-                return error.Impossible;
+                for (line, 0..) |char, idx| {
+                    if (char == ':') {
+                        if (idx > 0 and (line[idx - 1] == ' ' or line[idx - 1] == '\t')) {
+                            self.buffer.diag().line_offset += idx - 1;
+                            self.buffer.diag().length = 1;
+                            self.buffer.diag().message = "this line contains space before the map key-value separator character ':'";
+                            return error.TrailingWhitespace;
+                        }
+
+                        if (idx + 1 == line.len) {
+                            self.buffer.diag().line_offset += idx + 1;
+                            return .{
+                                .shift = shift,
+                                .contents = .{ .map_item = .{ .key = line[0..idx], .val = .empty } },
+                                .raw = line,
+                            };
+                        }
+
+                        if (line[idx + 1] != ' ') {
+                            self.buffer.diag().line_offset += idx + 1;
+                            self.buffer.diag().length = 1;
+                            self.buffer.diag().message = "this line is missing a space after the map key-value separator character ':'";
+                            return error.BadToken;
+                        }
+
+                        return .{
+                            .shift = shift,
+                            .contents = .{ .map_item = .{
+                                .key = line[0..idx],
+                                .val = try self.detectInlineItem(line[idx + 2 ..]),
+                            } },
+                            .raw = line,
+                        };
+                    }
+                }
+
+                return .{
+                    .shift = shift,
+                    .contents = .{ .in_line = .{ .scalar = line } },
+                    .raw = line,
+                };
            }
            return null;
        }
@@ -267,9 +288,25 @@ pub fn LineTokenizer(comptime Buffer: type) type {
        fn detectInlineItem(self: @This(), buf: []const u8) Error!InlineItem {
            if (buf.len == 0) return .empty;

-            switch (buf[0]) {
-                '>', '|' => |char| {
-                    if (buf.len > 1 and buf[1] != ' ') return error.BadToken;
+            const start = start: {
+                for (buf, 0..) |chr, idx|
+                    if (chr == ' ')
+                        continue
+                    else if (chr == '\t')
+                        return error.IllegalTabWhitespaceInLine
+                    else
+                        break :start idx;
+
+                return error.TrailingWhitespace;
+            };
+
+            switch (buf[start]) {
+                '>', '|', '+' => |char| {
+                    if (buf.len - start > 1 and buf[start + 1] != ' ') {
+                        self.buffer.diag().length = 1;
+                        self.buffer.diag().message = "this line is missing a space after the string start character";
+                        return error.BadToken;
+                    }

                    const slice: []const u8 = switch (buf[buf.len - 1]) {
                        ' ', '\t' => {
@@ -278,36 +315,38 @@ pub fn LineTokenizer(comptime Buffer: type) type {
                            self.buffer.diag().message = "this line contains trailing whitespace";
                            return error.TrailingWhitespace;
                        },
-                        '|' => buf[@min(2, buf.len) .. buf.len - @intFromBool(buf.len > 1)],
-                        else => buf[@min(2, buf.len)..buf.len],
+                        '|' => buf[start + @min(2, buf.len - start) .. buf.len - @intFromBool(buf.len - start > 1)],
+                        else => buf[start + @min(2, buf.len - start) .. buf.len],
                    };

-                    return if (char == '>')
-                        .{ .line_string = slice }
-                    else
-                        .{ .space_string = slice };
+                    return switch (char) {
+                        '>' => .{ .line_string = slice },
+                        '+' => .{ .space_string = slice },
+                        '|' => .{ .concat_string = slice },
+                        else => unreachable,
+                    };
                },
                '[' => {
-                    if (buf.len < 2 or buf[buf.len - 1] != ']') {
+                    if (buf.len - start < 2 or buf[buf.len - 1] != ']') {
                        self.buffer.diag().line_offset = 0;
                        self.buffer.diag().length = 1;
-                        self.buffer.diag().message = "this line contains a flow-style list but does not end with the closing character ']'";
+                        self.buffer.diag().message = "this line contains a inline list but does not end with the closing character ']'";
                        return error.BadToken;
                    }

-                    // keep the closing ] for the flow parser
-                    return .{ .flow_list = buf[1..] };
+                    // keep the closing ] for the inline parser
+                    return .{ .inline_list = buf[start + 1 ..] };
                },
                '{' => {
-                    if (buf.len < 2 or buf[buf.len - 1] != '}') {
+                    if (buf.len - start < 2 or buf[buf.len - 1] != '}') {
                        self.buffer.diag().line_offset = 0;
                        self.buffer.diag().length = 1;
-                        self.buffer.diag().message = "this line contains a flow-style map but does not end with the closing character '}'";
+                        self.buffer.diag().message = "this line contains a inline map but does not end with the closing character '}'";
                        return error.BadToken;
                    }

-                    // keep the closing } fpr the flow parser
-                    return .{ .flow_map = buf[1..] };
+                    // keep the closing } for the inline parser
+                    return .{ .inline_map = buf[start + 1 ..] };
                },
                else => {
                    if (buf[buf.len - 1] == ' ' or buf[buf.len - 1] == '\t') {
@@ -317,7 +356,7 @@ pub fn LineTokenizer(comptime Buffer: type) type {
                        return error.TrailingWhitespace;
                    }

-                    return .{ .scalar = buf };
+                    return .{ .scalar = buf[start..] };
                },
            }
        }
--- a/tests/main.zig
+++ b/tests/main.zig
@@ -0,0 +1,5 @@
+comptime {
+    if (@import("builtin").is_test) {
+        _ = @import("./reify.zig");
+    }
+}
--- a/tests/reify.zig
+++ b/tests/reify.zig
@@ -0,0 +1,144 @@
+const std = @import("std");
+
+const nice = @import("nice");
+
+fn reifyScalar(comptime scalar: []const u8, expected: anytype) !void {
+    try reifyScalarWithOptions(scalar, expected, .{});
+}
+
+fn reifyScalarWithOptions(comptime scalar: []const u8, expected: anytype, options: nice.parser.Options) !void {
+    const allocator = std.testing.allocator;
+    var diagnostics = nice.Diagnostics{};
+    const parsed = try nice.parseBufferTo(
+        @TypeOf(expected),
+        allocator,
+        scalar ++ "\n",
+        &diagnostics,
+        options,
+    );
+    defer parsed.deinit();
+
+    try std.testing.expectEqual(expected, parsed.value);
+}
+
+test "reify integer" {
+    try reifyScalar("123", @as(u8, 123));
+    try reifyScalar("0123", @as(u8, 123));
+    try reifyScalar("1_23", @as(u8, 123));
+    try reifyScalar("-01_23", @as(i8, -123));
+}
+
+test "reify hexadecimal" {
+    try reifyScalar("0x123", @as(i64, 0x123));
+    try reifyScalar("0x0123", @as(i64, 0x123));
+    try reifyScalar("0x01_23", @as(i64, 0x123));
+    try reifyScalar("-0x01_23", @as(i64, -0x123));
+}
+
+test "reify octal" {
+    try reifyScalar("0o123", @as(i64, 0o123));
+    try reifyScalar("0o0123", @as(i64, 0o123));
+    try reifyScalar("0o01_23", @as(i64, 0o123));
+    try reifyScalar("-0o01_23", @as(i64, -0o123));
+}
+
+test "reify binary" {
+    try reifyScalar("0b1011", @as(i5, 0b1011));
+    try reifyScalar("0b01011", @as(i5, 0b1011));
+    try reifyScalar("0b010_11", @as(i5, 0b1011));
+    try reifyScalar("-0b010_11", @as(i5, -0b1011));
+}
+
+test "reify float" {
+    try reifyScalar("0.25", @as(f32, 0.25));
+    try reifyScalar("0.2_5", @as(f32, 0.25));
+    try reifyScalar("00.250", @as(f32, 0.25));
+    try reifyScalar("-0.25", @as(f32, -0.25));
+}
+
+test "reify hexfloat" {
+    try reifyScalar("0x0.25", @as(f64, 0x0.25));
+    try reifyScalar("0x0.2_5", @as(f64, 0x0.25));
+    try reifyScalar("0x0.250p1", @as(f64, 0x0.25p1));
+    try reifyScalar("-0x0.25", @as(f64, -0x0.25));
+}
+
+test "reify true" {
+    try reifyScalar("true", true);
+    try reifyScalar("True", true);
+    try reifyScalar("yes", true);
+    try reifyScalar("on", true);
+}
+
+test "reify false" {
+    try reifyScalar("false", false);
+    try reifyScalar("False", false);
+    try reifyScalar("no", false);
+    try reifyScalar("off", false);
+}
+
+test "reify custom true" {
+    const options = nice.parser.Options{ .truthy_boolean_scalars = &.{"correct"} };
+    try reifyScalarWithOptions("correct", true, options);
+}
+
+test "reify true case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("TRUE", true));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("TRUE", true, options);
+}
+
+test "reify custom false" {
+    const options = nice.parser.Options{ .falsy_boolean_scalars = &.{"incorrect"} };
+    try reifyScalarWithOptions("incorrect", false, options);
+}
+
+test "reify false case insensitive" {
+    try std.testing.expectError(error.BadValue, reifyScalar("FALSE", false));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("FALSE", false, options);
+}
+
+test "reify null" {
+    try reifyScalar("null", @as(?u8, null));
+    try reifyScalar("nil", @as(?u8, null));
+    try reifyScalar("None", @as(?u8, null));
+}
+
+test "reify custom null" {
+    const options = nice.parser.Options{ .null_scalars = &.{"nothing"} };
+    try reifyScalarWithOptions("nothing", @as(?u8, null), options);
+}
+
+test "reify null case insensitive" {
+    // this is a little weird because when the null string mismatches, it will try to
+    // parse the child optional type and produce either a value or an error from that,
+    // so the error received depends on whether or not the optional child type fails to
+    // parse the given value.
+    try std.testing.expectError(error.InvalidCharacter, reifyScalar("NULL", @as(?u8, null)));
+    const options = nice.parser.Options{ .case_insensitive_scalar_coersion = true };
+    try reifyScalarWithOptions("NULL", @as(?u8, null), options);
+}
+
+test "reify void" {
+    // A void scalar cannot exist on its own as it is not distinguishable from an empty
+    // document.
+    const Void = struct { void: void };
+    try reifyScalar("void:", Void{ .void = void{} });
+}
+
+test "reify void scalar" {
+    const options = nice.parser.Options{ .default_object = .scalar };
+    try reifyScalarWithOptions("", void{}, options);
+}
+
+test "reify enum" {
+    const Enum = enum { one, two };
+    try reifyScalar(".one", Enum.one);
+}
+
+test "reify enum no dot" {
+    const options = nice.parser.Options{ .expect_enum_dot = false };
+    const Enum = enum { one, two };
+    try reifyScalarWithOptions("two", Enum.two, options);
+}
Author	SHA1	Message	Date
torque	e562e30e5e	grammar, spelling	2024-06-18 18:33:57 -07:00
torque	8aaceba484	parser.value.convertTo: add field converter concept It is convenient to be able to have custom logic for a specific field on a given struct without having to write a function to manually reify the whole thing from scratch.	2024-06-18 18:32:22 -07:00
torque	c74d615131	parser.value.convertTo: simplify struct field usage This avoids having to clone the map while maintaining the same conversion strictness.	2024-06-18 18:32:22 -07:00
torque	8ccb2c3a66	build: update for zig-0.13	2024-06-18 18:24:19 -07:00
torque	ad73ea6508	build: update for 0.12.0-dev.2208+4debd4338 I am hoping that by starting to roll over to zig 0.12 now it will be easier to migrate when the release actually happens. Unfortunately, the build system API changed fairly significantly and supporting both 0.11 and 0.12-dev is not very interesting.	2024-01-15 22:10:15 -08:00
torque	875b1b6344	start adding tests	2023-12-01 22:35:18 -08:00
torque	ea52c99fee	parser.Options: split truthy/falsy scalars into separate fields This makes overriding the defaults of just one of truthy or falsy more ergonomic. Previously, when overriding the truthy scalars, the user would also have to specify all of the falsy scalars as well.	2023-12-01 22:33:14 -08:00
torque	dbf2762982	parser: empty document should be scalar, not string I think I originally set this up before I had fully decided on the semantics of scalars vs strings. This option makes much more sense to me because it mirrors the empty value behavior map keys. Without an introducer sequence, it's can't be a string.	2023-12-01 22:31:30 -08:00
torque	0f4a9fcaa7	misc: commit things at random	2023-11-23 18:38:03 -08:00
torque	bd079b42d9	compile with zig master I was actually anticipating a bit more stdlib breakage than this, so I ended up just shimming it. Well, it works and also still works with 0.11.0, which is cool.	2023-11-23 18:37:19 -08:00
torque	bd0d74ee6a	examples.reify: add default value field	2023-11-23 17:56:27 -08:00
torque	2208079355	parser.Options: embellish expect_enum_dot description This affects tagged union parsing, and that should be mentioned here. So now it is.	2023-11-23 17:55:47 -08:00
torque	98eac68929	value: simplify list conversion code There was really no reason to use ArrayLists here when the list length is known ahead of time. This slightly shortens the code and should be slightly more memory/stack efficient.	2023-11-23 17:54:14 -08:00
torque	39619e7d6b	value: fix use of parseFloat	2023-11-23 17:52:38 -08:00
torque	33ab092a06	value: store strings/scalars as null-terminated Since these were already always copied from the source data, this was a very easy change to implement. This makes our output schema string detection a bit stricter, and saves performing a copy in the case that the output string needs to be 0 terminated. Unfortunately, we can't skip copies in the general slice case since each child element needs to get converted to the appropriate type.	2023-11-23 17:52:38 -08:00
torque	21a9753d46	parser: change omitted value behavior to work with all default values Special casing optional values was a little odd before. Now, the user can supply a default value for any field that may be omitted from the serialized data. This behaves the same way as the stdlib JSON parser as well.	2023-11-23 17:47:21 -08:00
torque	e8ddee5ab2	examples.reify: implement updated union/enum semantics	2023-11-06 20:45:04 -08:00
torque	2f90ccba6f	parser: accept 0-size tagged union values as scalars Given the type: union(enum) { none: void, any: []const u8, }; Previously your document would have had to be .none: But now this can also be parsed as the simple scalar .none This is much nicer if the tagged union is a member of a larger type, like a struct, since the value can be specified in-line without needing to create a map. my_union: .none Whereas previously this would have had to have been (this style is still supported): my_union: { .none: } or my_union: .none:	2023-11-06 20:45:04 -08:00
torque	d6e1e85ea1	parser: make tagged union field names respect expect_enum_dot It's possible that this change may get reverted in the future, but I think it makes things more consistent and has some other minor benefits, so it probably won't be. Consistency: tagged union fields are enum members by definition in zig, so it makes these act like enumerations that accept values, which is really how tagged unions work in zig. Other benefits: tagged unions do not behave like structs, and having their key start with a leading . helps to distinguish them visually. You could say that it makes communicating intent more precise. Here's an example: by default, given the following type: union(enum) { any: []const u8, int: i32, }; A corresponding nice document would now look like: .int: 42069 Whereas it used to be: int: 42069 My only concern here is that this potentially makes the serialization noisier. But if so that's true of the enum handling, too.	2023-11-06 20:43:21 -08:00
torque	ed913ab3a3	state: properly update key order when preserving the last key Since I decided that Nice would guarantee (for some definition of guarantee) preserving the order of keys in a document, this has some impact on the parsing modes that tolerate duplicate keys. In the case that the last instance of a duplicate key is the one that is preserved, its order should be reflected. In general, however, it's recommended not to permit duplicate keys, which is why that's the default behavior.	2023-11-06 20:15:02 -08:00
torque	73575a43a7	readme: basic editing pass This still needs a lot of TLC to be actually, y'know, decent, but at least it can become infinitesimally less bad.	2023-11-06 20:13:06 -08:00
torque	1c5d7af552	parser: don't leak on parseTo error A good idea.	2023-10-22 16:49:12 -07:00
torque	f371aa281c	parser: default expect enum values with leading `.` I prefer this, personally. And this is all about personal preference.	2023-10-22 16:48:45 -07:00
torque	ce65dee71f	parser: ostensibly fix sentinel handling I guess arrays don't need special handling because their memory is explicitly accounted for, but it would probably be good to check that a sentinel-terminated array initialized as `undefined` does get the correct sentinel value.	2023-10-22 16:38:41 -07:00
torque	f371f16e2f	slam dunk that minimum viable product vibe	2023-10-22 16:16:57 -07:00
torque	f381edfff3	nice: chuck outdated format description comment	2023-10-22 15:36:50 -07:00
torque	6d2c08878d	examples: add parsing to an object example	2023-10-22 15:36:34 -07:00
torque	cca7d61666	readme: produce excessive verbiage And I'm not even done yet. Man.	2023-10-19 21:44:05 -07:00
torque	4690f0b808	parser: add option for case-insensitive scalar comparison This does not support unicode case folding, which is very much a sorry-not-sorry situation because unicode is a disgusting labyrinthine chaotic hellformat. Actually, our unicode support isn't very good from the standpoint that we don't do any form of normalization, so specifying non-ASCII values for scalar comparisons is probably asking for trouble.	2023-10-18 21:34:07 -07:00
torque	1f75ff6b8a	readme: continue reading me I'm going to have to come up with a license for this code also.	2023-10-18 21:29:58 -07:00
torque	c83558de3e	start adding the readme	2023-10-18 21:15:29 -07:00
torque	4c966ca9d0	parser: reintroduce space strings and change token parsing strategy Once again I have entangled two conceptually distinct changes into a single commit because demuxing them from the diff is too much work. Alas. Let's break it down. The simpler part of this change is to reintroduce "space strings" with a slightly fresh coat of paint. We now have 3 different types of string leaders that can be used together. So we now have: \| directly concatenates this line with the previous line > prepends an LF character before concatenation + (NEW) prepends a single space character before concatenation The `+` leader enables more æsthetic soft line wrapping than `\|` because it doesn't require the use of leading or trailing the whitespace to separate words, as long as lines are broken at word boundaries. Perhaps this is not as common a usecase as I am making it, but I do like to hard wrap paragraphs in documents, so if anything, it's a feature for me. As I was considering what character to use for this leader, I realized that I wanted to be able to support numeric map keys, a la: -1: negative one 0: zero +1: positive one But previously this would not parse correctly, as the tokenizer would find `-` and expect it to be followed by a space to indicate a list item (and the additional string leader would cause the same problem with `+`). I wanted to support this use case, so the parser was changed to take a second pass on lines starting with the string leaders (`\|`, `+`, and `>`) and the list item leader (`-`) if the leader has a non-space character following it. Note that this does not apply to the comment leader (`#` not followed by a space or a newline is a tokenization error) or to the inline list/map leaders(since those do not respect internal whitespace, there is no way to treat them unambiguously). To reduce the likelihood of confusing documents, scalars are no longer allowed to occupy their own line (the exception to this is if the document consists only of a scalar value). Inline lists and maps can still occupy their own line, though I am considering changing this as well to force them to truly be inline. I think this change makes sense, as scalars are generally intended to be represent an unbroken single item serialization of some non-string value. In other words, # these two lines used to parse the same way key: 9001 # but now the following line is a parse error due to the scalar # occupying its own line key: 9001 # also, this still works, but it may be changed to be an error in # the future key: [ 9, 0, 0, 1 ] Inline maps have also been changed so that their keys can start with the now-unforbidden string leaders and list item leader characters.	2023-10-18 21:15:29 -07:00
torque	25386ac87a	rename flow_(list\|map) to inline_(list\|map) This is simply better word choice.	2023-10-18 00:07:12 -07:00
torque	8dd5463683	parser: change string and \| semantics and expose slices in Value The way I implemented these changes ended up being directly coupled and I am not interested in trying to decouple them, so instead here's a single commit that makes changes to both the API and the format. Let's go over these. \| now acts as a direct concatenation operator, rather than concatenating with a space. This is because the format allows the specification of a trailing space (by using \| to fence the string just before the newline). So it's now possible to spread a long string without spaces over multiple lines, which couldn't be done before. This does have the downside that the common pattern of concatenating strings with a space now requires some extra trailing line noise. I may introduce a THIRD type of concatenating string (thinking of using + as the prefix) because I am a jerk. We will see. The way multi-line strings are concatenated has changed. Partially this has to do with increasing the simplicity of the aforementioned implementation change (the parser forgets the string type from the tokenizer. This worked before because there would always be a trailing character that could be popped off. But since one type now appends no character, this would have to be tracked through the parsing to determine if a character would need to be popped at the end). But I was also not terribly satisfied with the semantics of multiline strings before. I wrote several words about this in `429734e6e8`, where I reached the opposite conclusion from what is implemented in this commit. Basically, when different types of string concatenation are mixed, the results may be surprising. The previous approach would append the line terminator at the end of the line specified. The new approach prepends the line terminator at the beginning of the line specified. Since the specifier character is at the beginning of the line, I feel like this reads a little better simply due to the colocation of information. As an example: > first \| second > third Would previously have resulted in "first\nsecondthird" but it will now result in "firstsecond\nthird". The only mildly baffling part about this is that the string signifier on the first line has absolutely no impact on the string. In the old design, it was the last line that had no impact. Finally, this commit also changes Value so that it uses []const u8 slices directly to store strings instead of ArrayLists. This is because everything downstream of the value was just reaching into string.items to access the slice directly, so cut out the middleman. It was unintuitive to access a field named .string and get an arraylist rather than a slice, anyway.	2023-10-08 16:57:52 -07:00
torque	7db6094dd5	state/tokenizer: go completely the opposite direction re: whitespace This commit makes both the parser and tokenizer a lot more willing to accept whitespace in places where it would previously cause strange behavior. Also, whitespace is ignored preceding and following all values and keys in flow-style objects now (in regular objects, trailing whitespace is an error, and it is also an error for non-flow map keys to have whitespace before the colon). Tabs are no longer allowed as whitespace in the line. They can be inside scalar values, though, including map keys. Also strings allow tabs inside of them. The primary motivation here is to apply the principle of least astonishment. For example, the following - [hello, there] would previously have been parsed as the scalar " [hello, there]" due to the presence of an additional space after the "-" list item indicator. This obviously looks like a flow list, and the way it was previously parsed was very visually confusing (this change does mean that scalars cannot start with [, but strings can, so this is not a real limitation. Note that strings still allow leading whitespace, so > hello will produce the string " hello" due to the additional space after the string designator. For flow lists, [ a, b ] would have been parsed as ["a", "b "], which was obviously confusing. The previous commit fixed this by making whitespace rules more strict. This commit fixes this by making whitespace rules more relaxed. In particular, all whitespace preceding and following flow items is now stripped. The main motivation for going in this direction is to allow aligning list items over multiple lines, visually, which can make data much easier to read for people, an explicit design goal. For example key: [ 1, 2, 3 ] other: [ 10, 20, 30 ] is now allowed. The indentation rules do not allow right-aligning "key" to "other", but I think that is acceptable (if we forced using tabs for indentation, we could actually allow this, which I think is worth consideration, at least). Flow maps are more generous: foo: { bar: baz } fooq: { barq: bazq } is allowed because flow maps do not use whitespace as a structural designator. These changes do affect how some things can be represented. Scalar values can no longer contain leading or trailing whitespace (previously the could contain leading whitespace). Map keys cannot contain trailing whitespace (they could before. This also means that keys consisting of whitespace cannot be represented at all). Ultimately, given the other restrictions the format imposes on keys and values, I find these to be acceptable and consistent with the goal of the format.	2023-10-04 22:54:53 -07:00