From 95aa6d01c613d756c1a5c1aeb66ffc86fe285305 Mon Sep 17 00:00:00 2001 From: torque Date: Thu, 11 May 2023 10:13:13 -0700 Subject: [PATCH] tokenator: contextualize struct fields this also decouples the CLI from the toknization functions so they can be called from other programs. --- documentation/tokenator.zig | 104 ++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 27 deletions(-) diff --git a/documentation/tokenator.zig b/documentation/tokenator.zig index 954b4a3..ed40342 100644 --- a/documentation/tokenator.zig +++ b/documentation/tokenator.zig @@ -83,13 +83,8 @@ fn write_whitespace(out: anytype, input: []const u8) !void { } // TODO: use more context to get better token resolution -// identifier preceded by (break | continue) colon is a label -// identifier followed by colon (inline | for | while | l_brace) is a label // // identifier preceded by dot, not preceded by name, and followed by (, | => | == | != | rbrace | rparen | and | or | ;) is an enum literal -// identifier preceded by dot and followed by = is a struct field initializer -// -// true, false, null are not keywords but we should be able to treat them as literals. They should all be tokenized as identifiers // // identifier followed by ( is always a function call // @@ -113,6 +108,8 @@ const TokenClass = enum { doc_comment, literal_primitive, literal_number, + literal_enum, + field_name, symbology, whitespace, context_free, @@ -124,6 +121,8 @@ const TokenClass = enum { .doc_comment => "doc comment", .literal_primitive => "literal primitive", .literal_number => "literal number", + .literal_enum => "literal enum", + .field_name => "field-name", .symbology => "", .context_free => "", .whitespace => "", @@ -217,6 +216,7 @@ pub const ContextManager = struct { return (contextualize_function(tokens, current) or contextualize_builtin_type(tokens, current) or contextualize_label(tokens, current) or + contextualize_struct_field(tokens, current) or contextualize_fallback(tokens, current)); } @@ -228,6 +228,11 @@ pub const ContextManager = struct { return true; } + if (current < tokens.len - 1 and tokens[current + 1].tag == .l_paren) { + tokens[current].class = .function; + return true; + } + return false; } @@ -290,6 +295,25 @@ pub const ContextManager = struct { return false; } + fn contextualize_struct_field(tokens: []ContextToken, current: usize) bool { + if (current == 0) return false; + if (tokens[current - 1].tag != .period) return false; + + const precursor = prev_valid(tokens, current - 1) orelse return false; + const succesor = next_valid(tokens, current) orelse return false; + + if ((tokens[precursor].tag == .l_brace or + tokens[precursor].tag == .comma) and + tokens[succesor].tag == .equal) + { + tokens[current - 1].class = .field_name; + tokens[current].class = .field_name; + return true; + } + + return false; + } + fn contextualize_fallback(tokens: []ContextToken, current: usize) bool { tokens[current].class = .context_free; return true; @@ -618,6 +642,44 @@ fn getTokenLocation(src: []const u8, token: std.zig.Token) Location { return loc; } +pub fn tokenize_buffer( + buffer: []const u8, + allocator: std.mem.Allocator, + writer: anytype, + full_html: bool, +) !void { + const intermediate = try allocator.dupeZ(u8, buffer); + defer allocator.free(intermediate); + + try write_tokenized_html(intermediate, allocator, writer, full_html); +} + +pub fn tokenize_file( + file_name: []const u8, + allocator: std.mem.Allocator, + writer: anytype, + full_html: bool, +) !void { + const srcbuf = blk: { + const file = fs.cwd().openFile(file_name, .{ .mode = .read_only }) catch |err| { + std.debug.print("couldnt open {s}\n", .{file_name}); + return err; + }; + defer file.close(); + + break :blk try file.readToEndAllocOptions( + allocator, + 1_000_000, + null, + @alignOf(u8), + 0, + ); + }; + defer allocator.free(srcbuf); + + try write_tokenized_html(srcbuf, allocator, writer, full_html); +} + const html_preamble = \\ \\ @@ -666,7 +728,7 @@ const html_epilogue = ; const tokenator = cmd: { - var cmd = noclip.CommandBuilder(TokCtx){ + var cmd = noclip.CommandBuilder(*TokCtx){ .description = \\Tokenize one or more zig files into HTML. \\ @@ -702,41 +764,29 @@ const TokCtx = struct { allocator: std.mem.Allocator, }; -fn tokenize_files(context: *TokCtx, parameters: tokenator.Output()) !void { +fn tokenize_files_cli(context: *TokCtx, parameters: tokenator.Output()) !void { const stdout = std.io.getStdOut().writer(); for (parameters.files.items) |file_name| { - const srcbuf = blk: { - const file = try fs.cwd().openFile(file_name, .{ .mode = .read_only }); - defer file.close(); - - break :blk try file.readToEndAllocOptions( - context.allocator, - 1_000_000, - null, - @alignOf(u8), - 0, - ); - }; - defer context.allocator.free(srcbuf); - if (parameters.write_stdout) { - try write_tokenized_html(srcbuf, context.allocator, stdout, parameters.full); + try tokenize_file(file_name, context.allocator, stdout, parameters.full); try stdout.writeByte(0); } else { const outname = try std.mem.join(context.allocator, ".", &[_][]const u8{ file_name, "html" }); - print("writing: {s}\n", .{outname}); defer context.allocator.free(outname); - const output = try fs.cwd().createFile(outname, .{}); defer output.close(); - try write_tokenized_html(srcbuf, context.allocator, output.writer(), parameters.full); + print("writing: {s}", .{outname}); + errdefer print(" failed!\n", .{}); + + try tokenize_file(file_name, context.allocator, output.writer(), parameters.full); + print(" done\n", .{}); } } } -pub fn main() !u8 { +pub fn cli() !u8 { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); @@ -746,7 +796,7 @@ pub fn main() !u8 { var arena = std.heap.ArenaAllocator.init(gpa.allocator()); defer arena.deinit(); - var cli_parser = tokenator.create_parser(tokenize_files, arena.allocator()); + var cli_parser = tokenator.create_parser(tokenize_files_cli, arena.allocator()); try cli_parser.execute(&ctx); return 0;