reorganize and add parsing diagnostics

This commit is contained in:
torque 2024-03-06 20:54:31 -08:00
parent 1ecfb41d55
commit cdf2ec5a23
Signed by: torque
SSH Key Fingerprint: SHA256:nCrXefBNo6EbjNSQhv0nXmEg/VuNq3sMF5b8zETw3Tk
4 changed files with 681 additions and 553 deletions

View File

@ -9,7 +9,7 @@ pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const yaml_zig = b.addModule("libyaml", .{
.source_file = .{ .path = "src/libyaml.zig" },
.source_file = .{ .path = "src/yaml.zig" },
});
// yaml_zig.addIncludePath(.{ .path = b.getInstallPath(.header, "") });
// _ = yaml_zig;

View File

@ -1,206 +1,28 @@
const std = @import("std");
pub const Scalar = []const u8;
pub const List = []Value;
pub const Map = std.StringArrayHashMapUnmanaged(Value);
pub fn Owned(comptime T: type) type {
return struct {
root: T,
allocator: *std.heap.ArenaAllocator,
pub fn deinit(self: @This()) void {
const child = self.allocator.child_allocator;
self.allocator.deinit();
child.destroy(self.allocator);
}
};
}
pub const Value = union(enum) {
scalar: Scalar,
list: List,
map: Map,
pub fn fromString(allocator: std.mem.Allocator, data: []const u8) !Owned(Value) {
var parser = try libyaml.Parser.init();
defer parser.deinit();
parser.setInputString(data);
var builder = try Builder.init(allocator);
errdefer builder.deinit();
var docseen = false;
while (true) {
var event: libyaml.Event = undefined;
parser.parse(&event) catch {
std.debug.print(
"parser failed: {s}, {s}, line {d}, col: {d}\n",
.{ @tagName(parser.@"error"), parser.problem.?, parser.problem_mark.line, parser.problem_mark.column },
);
return error.Failed;
};
defer event.deinit();
std.debug.print("event: {s}\n", .{@tagName(event.type)});
switch (event.type) {
.empty => return error.Failed,
.stream_start => {},
.stream_end => break,
.document_start => {},
.document_end => docseen = if (docseen) return error.Failed else true,
.alias => return error.Failed,
.scalar => try builder.pushScalar(event.data.scalar.value[0..event.data.scalar.length]),
.sequence_start => try builder.startList(),
.sequence_end => try builder.endList(),
.mapping_start => try builder.startMap(),
.mapping_end => try builder.endMap(),
}
}
return builder.disown();
}
pub const Builder = struct {
pub const Stack = union(enum) {
root,
list: std.ArrayListUnmanaged(Value),
map: struct {
lastkey: ?Scalar = null,
map: Map,
},
};
allocator: std.mem.Allocator,
container_stack: std.ArrayListUnmanaged(Stack),
root: Value,
pub fn init(child_allocator: std.mem.Allocator) std.mem.Allocator.Error!Builder {
const arena = try child_allocator.create(std.heap.ArenaAllocator);
arena.* = std.heap.ArenaAllocator.init(child_allocator);
const allocator = arena.allocator();
var stack = try std.ArrayListUnmanaged(Stack).initCapacity(allocator, 1);
stack.appendAssumeCapacity(.root);
return .{
.allocator = allocator,
.container_stack = stack,
.root = .{ .scalar = "" },
};
}
// this should only be run on failure.
pub fn deinit(self: Builder) void {
const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(self.allocator.ptr));
const alloc = arena.child_allocator;
arena.deinit();
alloc.destroy(arena);
}
pub fn disown(self: *Builder) Owned(Value) {
return .{
.root = self.root,
.allocator = @ptrCast(@alignCast(self.allocator.ptr)),
};
}
fn pushScalar(self: *Builder, value: Scalar) !void {
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => {
self.root = .{ .scalar = try self.allocator.dupe(u8, value) };
},
.list => |*builder| try builder.append(
self.allocator,
.{ .scalar = try self.allocator.dupe(u8, value) },
),
.map => |*builder| {
if (builder.lastkey) |key| {
try builder.map.put(self.allocator, key, .{ .scalar = try self.allocator.dupe(u8, value) });
builder.lastkey = null;
} else {
const duped = try self.allocator.dupe(u8, value);
try builder.map.put(self.allocator, duped, undefined);
builder.lastkey = duped;
}
},
}
}
fn startList(self: *Builder) !void {
try self.container_stack.append(self.allocator, .{ .list = .{} });
}
fn endList(self: *Builder) !void {
var top = self.container_stack.pop();
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => self.root = .{ .list = try top.list.toOwnedSlice(self.allocator) },
.list => |*builder| try builder.append(
self.allocator,
.{ .list = try top.list.toOwnedSlice(self.allocator) },
),
.map => |*builder| {
if (builder.lastkey) |key| {
try builder.map.put(self.allocator, key, .{ .list = try top.list.toOwnedSlice(self.allocator) });
builder.lastkey = null;
} else return error.Failed;
},
}
}
fn startMap(self: *Builder) !void {
try self.container_stack.append(self.allocator, .{ .map = .{ .map = .{} } });
}
fn endMap(self: *Builder) !void {
var top = self.container_stack.pop();
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => self.root = .{ .map = top.map.map },
.list => |*builder| try builder.append(
self.allocator,
.{ .map = top.map.map },
),
.map => |*builder| {
if (builder.lastkey) |key| {
try builder.map.put(self.allocator, key, .{ .map = top.map.map });
builder.lastkey = null;
} else return error.Failed;
},
}
}
};
};
pub const libyaml = struct {
pub const Encoding = enum(c_int) {
pub const Encoding = enum(c_int) {
any,
utf8,
utf16le,
utf16be,
};
};
pub const VersionDirective = extern struct {
pub const VersionDirective = extern struct {
major: c_int,
minor: c_int,
};
};
pub const TagDirective = extern struct {
pub const TagDirective = extern struct {
handle: ?[*:0]u8,
prefix: ?[*:0]u8,
};
};
pub const LineBreak = enum(c_int) {
pub const LineBreak = enum(c_int) {
any,
cr,
lf,
crlf,
};
};
pub const ErrorType = enum(c_int) {
pub const ErrorType = enum(c_int) {
okay,
alloc_error,
read_error,
@ -209,36 +31,36 @@ pub const libyaml = struct {
composer_error,
writer_error,
emitter_error,
};
};
pub const Mark = extern struct {
pub const Mark = extern struct {
index: usize,
line: usize,
column: usize,
};
};
pub const ScalarStyle = enum(c_int) {
pub const ScalarStyle = enum(c_int) {
any,
plain,
single_quoted,
double_quoted,
literal,
folded,
};
};
pub const SequenceStyle = enum(c_int) {
pub const SequenceStyle = enum(c_int) {
any,
block,
flow,
};
};
pub const MappingStyle = enum(c_int) {
pub const MappingStyle = enum(c_int) {
any,
block,
flow,
};
};
pub const TokenType = enum(c_int) {
pub const TokenType = enum(c_int) {
none,
stream_start,
stream_end,
@ -261,9 +83,9 @@ pub const libyaml = struct {
anchor,
tag,
scalar,
};
};
pub const Token = extern struct {
pub const Token = extern struct {
type: TokenType,
data: extern union {
stream_start: extern struct {
@ -289,9 +111,9 @@ pub const libyaml = struct {
},
start_mark: Mark,
end_mark: Mark,
};
};
pub const EventType = enum(c_int) {
pub const EventType = enum(c_int) {
empty,
stream_start,
stream_end,
@ -303,9 +125,9 @@ pub const libyaml = struct {
sequence_end,
mapping_start,
mapping_end,
};
};
pub const Event = extern struct {
pub const Event = extern struct {
type: EventType,
data: extern union {
stream_start: extern struct {
@ -351,30 +173,30 @@ pub const libyaml = struct {
}
pub extern fn yaml_event_delete(event: *Event) void;
};
};
pub const SimpleKey = extern struct {
pub const SimpleKey = extern struct {
possible: c_int,
required: c_int,
token_number: usize,
mark: Mark,
};
};
pub const NodeType = enum(c_int) {
pub const NodeType = enum(c_int) {
none,
scalar,
sequence,
mapping,
};
};
pub const NodeItem = c_int;
pub const NodeItem = c_int;
pub const NodePair = extern struct {
pub const NodePair = extern struct {
key: c_int,
value: c_int,
};
};
pub const Node = extern struct {
pub const Node = extern struct {
type: NodeType,
tag: ?[*:0]u8,
data: extern union {
@ -402,9 +224,9 @@ pub const libyaml = struct {
},
start_mark: Mark,
end_mark: Mark,
};
};
pub const Document = extern struct {
pub const Document = extern struct {
nodes: extern struct {
start: ?*Node,
end: ?*Node,
@ -419,17 +241,17 @@ pub const libyaml = struct {
end_implicit: c_int,
start_mark: Mark,
end_mark: Mark,
};
};
pub const AliasData = extern struct {
pub const AliasData = extern struct {
anchor: ?[*]u8,
index: c_int,
mark: Mark,
};
};
pub const ReadHandler = *const fn (ctx: ?*anyopaque, buffer: [*]u8, buffer_size: usize, bytes_read: *usize) callconv(.C) c_int;
pub const ReadHandler = *const fn (ctx: ?*anyopaque, buffer: [*]u8, buffer_size: usize, bytes_read: *usize) callconv(.C) c_int;
pub const ParserState = enum(c_int) {
pub const ParserState = enum(c_int) {
stream_start,
implicit_document_start,
document_start,
@ -454,9 +276,9 @@ pub const libyaml = struct {
flow_mapping_value,
flow_mapping_empty_value,
end,
};
};
pub const Parser = extern struct {
pub const Parser = extern struct {
@"error": ErrorType,
problem: ?[*:0]const u8,
problem_offset: usize,
@ -563,5 +385,4 @@ pub const libyaml = struct {
pub extern fn yaml_parser_scan(parser: *Parser, token: *Token) c_int;
pub extern fn yaml_parser_parse(parser: *Parser, event: *Event) c_int;
pub extern fn yaml_parser_load(parser: *Parser, document: *Document) c_int;
};
};

View File

@ -14,7 +14,14 @@ pub fn main() !void {
);
defer allocator.free(slurp);
const doc = try yaml.Value.fromString(allocator, slurp);
var diag = yaml.ParseDiagnostic{ .message = "?????" };
const doc = yaml.Document.fromString(allocator, slurp, &diag) catch |err| {
std.debug.print(
"Failed to parse line: {d}, col: {d}: {s}\n",
.{ diag.line, diag.col, diag.message },
);
return err;
};
defer doc.deinit();
std.debug.print("\n-----\n\n", .{});

300
src/yaml.zig Normal file
View File

@ -0,0 +1,300 @@
const std = @import("std");
pub const libyaml = @import("./libyaml.zig");
pub const Scalar = []const u8;
pub const List = []Value;
pub const Map = std.StringArrayHashMapUnmanaged(Value);
pub const ParseDiagnostic = struct {
message: []const u8,
line: usize = 0,
col: usize = 0,
pub fn set(self: *ParseDiagnostic, mark: libyaml.Mark, message: []const u8) void {
self.line = mark.line + 1;
self.col = mark.column;
self.message = message;
}
pub fn setMark(self: *ParseDiagnostic, mark: libyaml.Mark) void {
self.line = mark.line + 1;
self.col = mark.column;
}
pub fn setMessage(self: *ParseDiagnostic, message: []const u8) void {
self.message = message;
}
};
pub const Document = struct {
root: Value,
allocator: *std.heap.ArenaAllocator,
pub fn fromString(allocator: std.mem.Allocator, data: []const u8, diag: *ParseDiagnostic) !Document {
var parser = libyaml.Parser.init() catch {
diag.setMessage("could not initialize libyaml parser");
return error.Failed;
};
defer parser.deinit();
parser.setInputString(data);
var builder = Value.Builder.init(allocator) catch {
diag.setMessage("could not initialize value builder: out of memory");
return error.Failed;
};
errdefer builder.deinit();
var docseen = false;
while (true) {
var event: libyaml.Event = undefined;
parser.parse(&event) catch {
diag.set(
parser.problem_mark,
if (parser.problem) |problem|
std.mem.span(problem)
else
"parsing failed without a description",
);
return error.Failed;
};
defer event.deinit();
switch (event.type) {
.empty => {
diag.set(event.start_mark, "an empty event was generated (???)");
return error.Failed;
},
.alias => {
diag.set(event.start_mark, "an alias node was encountered (these are not supported)");
return error.Failed;
},
.document_start => {
if (docseen) {
diag.set(event.start_mark, "A second YAML document was found");
return error.Failed;
}
docseen = true;
},
.scalar => builder.pushScalar(event.data.scalar.value[0..event.data.scalar.length], diag) catch {
diag.setMark(event.start_mark);
return error.Failed;
},
.sequence_start => builder.startList(diag) catch {
diag.setMark(event.start_mark);
return error.Failed;
},
.sequence_end => builder.endList(diag) catch {
diag.setMark(event.start_mark);
return error.Failed;
},
.mapping_start => builder.startMap(diag) catch {
diag.setMark(event.start_mark);
return error.Failed;
},
.mapping_end => builder.endMap(diag) catch {
diag.setMark(event.start_mark);
return error.Failed;
},
.stream_start, .document_end => {},
.stream_end => break,
}
}
return builder.document() catch {
diag.setMessage("The value builder container stack is not empty, somehow?");
return error.Failed;
};
}
pub fn deinit(self: Document) void {
const child = self.allocator.child_allocator;
self.allocator.deinit();
child.destroy(self.allocator);
}
};
pub const Value = union(enum) {
scalar: Scalar,
list: List,
map: Map,
pub const Builder = struct {
pub const Stack = union(enum) {
root,
list: std.ArrayListUnmanaged(Value),
map: struct {
lastkey: ?Scalar = null,
map: Map,
},
};
allocator: std.mem.Allocator,
container_stack: std.ArrayListUnmanaged(Stack),
root: Value,
pub fn init(child_allocator: std.mem.Allocator) std.mem.Allocator.Error!Builder {
const arena = try child_allocator.create(std.heap.ArenaAllocator);
arena.* = std.heap.ArenaAllocator.init(child_allocator);
const allocator = arena.allocator();
var stack = try std.ArrayListUnmanaged(Stack).initCapacity(allocator, 1);
stack.appendAssumeCapacity(.root);
return .{
.allocator = allocator,
.container_stack = stack,
.root = .{ .scalar = "" },
};
}
// this should only be run on failure.
pub fn deinit(self: Builder) void {
const arena: *std.heap.ArenaAllocator = @ptrCast(@alignCast(self.allocator.ptr));
const alloc = arena.child_allocator;
arena.deinit();
alloc.destroy(arena);
}
pub fn document(self: *Builder) !Document {
if (self.container_stack.getLast() != .root)
return error.Failed;
return .{
.root = self.root,
.allocator = @ptrCast(@alignCast(self.allocator.ptr)),
};
}
fn pushScalar(self: *Builder, value: Scalar, diag: *ParseDiagnostic) !void {
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => {
self.root = .{ .scalar = try self.allocator.dupe(u8, value) };
},
.list => |*builder| builder.append(self.allocator, .{
.scalar = self.allocator.dupe(u8, value) catch {
diag.setMessage("could not duplicate scalar (out of memory)");
return error.Failed;
},
}) catch {
diag.setMessage("could not append scalar to list (out of memory)");
return error.Failed;
},
.map => |*builder| {
if (builder.lastkey) |key| {
builder.map.put(self.allocator, key, .{
.scalar = self.allocator.dupe(u8, value) catch {
diag.setMessage("could not duplicate scalar (out of memory)");
return error.Failed;
},
}) catch {
diag.setMessage("could not set map value (out of memory)");
return error.Failed;
};
builder.lastkey = null;
} else {
const duped = self.allocator.dupe(u8, value) catch {
diag.setMessage("could not duplicate scalar (out of memory)");
return error.Failed;
};
builder.map.put(self.allocator, duped, undefined) catch {
diag.setMessage("could not set map key (out of memory)");
return error.Failed;
};
builder.lastkey = duped;
}
},
}
}
fn startList(self: *Builder, diag: *ParseDiagnostic) !void {
self.container_stack.append(self.allocator, .{ .list = .{} }) catch {
diag.setMessage("could not add list to stack: out of memory");
return error.Failed;
};
}
fn endList(self: *Builder, diag: *ParseDiagnostic) !void {
var top = self.container_stack.pop();
if (top != .list) {
diag.setMessage("list ended when a list was not the top container");
return error.Failed;
}
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => self.root = .{
.list = top.list.toOwnedSlice(self.allocator) catch {
diag.setMessage("could not take ownership of list");
return error.Failed;
},
},
.list => |*builder| builder.append(self.allocator, .{
.list = top.list.toOwnedSlice(self.allocator) catch {
diag.setMessage("could not take ownership of list");
return error.Failed;
},
}) catch {
diag.setMessage("could not append list to list");
return error.Failed;
},
.map => |*builder| {
if (builder.lastkey) |key| {
builder.map.put(self.allocator, key, .{
.list = top.list.toOwnedSlice(self.allocator) catch {
diag.setMessage("could not take ownership of list");
return error.Failed;
},
}) catch {
diag.setMessage("could not put list in map");
return error.Failed;
};
builder.lastkey = null;
} else {
diag.setMessage("found a list masquerading as a map key (only scalar keys are supported)");
return error.Failed;
}
},
}
}
fn startMap(self: *Builder, diag: *ParseDiagnostic) !void {
self.container_stack.append(self.allocator, .{ .map = .{ .map = .{} } }) catch {
diag.setMessage("could not add map to stack: out of memory");
return error.Failed;
};
}
fn endMap(self: *Builder, diag: *ParseDiagnostic) !void {
var top = self.container_stack.pop();
if (top != .map) {
diag.setMessage("map ended when a map was not the top container");
return error.Failed;
}
switch (self.container_stack.items[self.container_stack.items.len - 1]) {
.root => self.root = .{ .map = top.map.map },
.list => |*builder| builder.append(
self.allocator,
.{ .map = top.map.map },
) catch {
diag.setMessage("could not append map to list");
return error.Failed;
},
.map => |*builder| {
if (builder.lastkey) |key| {
builder.map.put(self.allocator, key, .{ .map = top.map.map }) catch {
diag.setMessage("could not put map in map");
return error.Failed;
};
builder.lastkey = null;
} else {
diag.setMessage("found a map masquerading as a map key (only scalar keys are supported)");
return error.Failed;
}
},
}
}
};
};