I will never get tired of vendoring dependencies. ha ha. It is possible
I am insane. I had to do a lot of pruning to get these not to be
ridiculous (especially the unicode data, which had nearly 1 million
lines of... stuff).
This commit is contained in:
2024-08-09 17:32:06 -07:00
commit 7692cb4bc7
155 changed files with 206515 additions and 0 deletions

67
deps/zg/codegen/canon.zig vendored Normal file
View File

@@ -0,0 +1,67 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cps: [3]u24 = undefined;
var len: u8 = 2;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
5 => {
// Not canonical.
if (field.len == 0 or field[0] == '<') continue :lines;
if (std.mem.indexOfScalar(u8, field, ' ')) |space| {
// Canonical
len = 3;
cps[1] = try std.fmt.parseInt(u24, field[0..space], 16);
cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16);
} else {
// Singleton
cps[1] = try std.fmt.parseInt(u24, field, 16);
}
},
2 => if (line[0] == '<') continue :lines,
else => {},
}
}
try writer.writeInt(u8, @intCast(len), endian);
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
}
try writer.writeInt(u16, 0, endian);
try out_comp.flush();
}

135
deps/zg/codegen/case_prop.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCoreProperties.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Props
var bit: u8 = 0;
if (mem.eql(u8, field, "Lowercase")) bit = 1;
if (mem.eql(u8, field, "Uppercase")) bit = 2;
if (mem.eql(u8, field, "Cased")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

124
deps/zg/codegen/ccc.zig vendored Normal file
View File

@@ -0,0 +1,124 @@
const std = @import("std");
const builtin = @import("builtin");
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCombiningClass.txt
var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{});
defer cc_file.close();
var cc_buf = std.io.bufferedReader(cc_file.reader());
const cc_reader = cc_buf.reader();
while (try cc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Combining Class
if (std.mem.eql(u8, field, "0")) continue;
const cc = try std.fmt.parseInt(u8, field, 10);
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), cc);
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const cc = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = cc;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

64
deps/zg/codegen/compat.zig vendored Normal file
View File

@@ -0,0 +1,64 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cps: [19]u24 = undefined;
var len: u8 = 1;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
5 => {
// Not compatibility.
if (field.len == 0 or field[0] != '<') continue :lines;
var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
_ = cp_iter.next(); // <compat type>
while (cp_iter.next()) |cp_str| : (len += 1) {
cps[len] = try std.fmt.parseInt(u24, cp_str, 16);
}
},
2 => if (line[0] == '<') continue :lines,
else => {},
}
}
try writer.writeInt(u8, @intCast(len), endian);
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
}
try writer.writeInt(u16, 0, endian);
try out_comp.flush();
}

138
deps/zg/codegen/core_props.zig vendored Normal file
View File

@@ -0,0 +1,138 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCoreProperties.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
var bit: u8 = 0;
if (mem.eql(u8, field, "Math")) bit = 1;
if (mem.eql(u8, field, "Alphabetic")) bit = 2;
if (mem.eql(u8, field, "ID_Start")) bit = 4;
if (mem.eql(u8, field, "ID_Continue")) bit = 8;
if (mem.eql(u8, field, "XID_Start")) bit = 16;
if (mem.eql(u8, field, "XID_Continue")) bit = 32;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

244
deps/zg/codegen/dwp.zig vendored Normal file
View File

@@ -0,0 +1,244 @@
const std = @import("std");
const builtin = @import("builtin");
const options = @import("options");
const block_size = 256;
const Block = [block_size]i3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(i3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, i3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedEastAsianWidth.txt
var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{});
defer deaw_file.close();
var deaw_buf = std.io.bufferedReader(deaw_file.reader());
const deaw_reader = deaw_buf.reader();
while (try deaw_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
// @missing ranges
if (std.mem.startsWith(u8, line, "# @missing: ")) {
const semi = std.mem.indexOfScalar(u8, line, ';').?;
const field = line[12..semi];
const dots = std.mem.indexOf(u8, field, "..").?;
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
if (from == 0 and to == 0x10ffff) continue;
for (from..to + 1) |cp| try flat_map.put(@intCast(cp), 2);
continue;
}
if (line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Width
if (std.mem.eql(u8, field, "W") or
std.mem.eql(u8, field, "F") or
(options.cjk and std.mem.eql(u8, field, "A")))
{
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 2);
}
},
else => {},
}
}
}
// Process DerivedGeneralCategory.txt
var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
defer dgc_file.close();
var dgc_buf = std.io.bufferedReader(dgc_file.reader());
const dgc_reader = dgc_buf.reader();
while (try dgc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// General category
if (std.mem.eql(u8, field, "Mn")) {
// Nonspacing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Me")) {
// Enclosing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Mc")) {
// Spacing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Cf")) {
if (std.mem.indexOf(u8, line, "ARABIC") == null) {
// Format except Arabic
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(i3).init(allocator);
defer stage2.deinit();
var block: Block = [_]i3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
var width = flat_map.get(cp) orelse 1;
// Specific overrides
switch (cp) {
// Three-em dash
0x2e3b => width = 3,
// C0/C1 control codes
0...0x20,
0x80...0xa0,
// Line separator
0x2028,
// Paragraph separator
0x2029,
// Hangul syllable and ignorable.
0x1160...0x11ff,
0xd7b0...0xd7ff,
0x2060...0x206f,
0xfff0...0xfff8,
0xe0000...0xE0fff,
=> width = 0,
// Two-em dash
0x2e3a,
// Regional indicators
0x1f1e6...0x1f200,
// CJK Blocks
0x3400...0x4dbf, // CJK Unified Ideographs Extension A
0x4e00...0x9fff, // CJK Unified Ideographs
0xf900...0xfaff, // CJK Compatibility Ideographs
0x20000...0x2fffd, // Plane 2
0x30000...0x3fffd, // Plane 3
=> width = 2,
else => {},
}
// ASCII
if (0x20 <= cp and cp < 0x7f) width = 1;
// Soft hyphen
if (cp == 0xad) width = 1;
// Backspace and delete
if (cp == 0x8 or cp == 0x7f) width = -1;
// Process block
block[block_len] = width;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(i8, i, endian);
try out_comp.flush();
}

252
deps/zg/codegen/fold.zig vendored Normal file
View File

@@ -0,0 +1,252 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
// Process DerivedCoreProperties.txt
var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer props_file.close();
var props_buf = std.io.bufferedReader(props_file.reader());
const props_reader = props_buf.reader();
var props_map = std.AutoHashMap(u21, void).init(allocator);
defer props_map.deinit();
var line_buf: [4096]u8 = undefined;
props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines;
for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {});
},
else => {},
}
}
}
var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
defer codepoint_mapping.deinit();
// Process CaseFolding.txt
var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
defer cp_file.close();
var cp_buf = std.io.bufferedReader(cp_file.reader());
const cp_reader = cp_buf.reader();
while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
var field_it = std.mem.splitScalar(u8, line, ';');
const codepoint_str = field_it.first();
const codepoint = try std.fmt.parseUnsigned(u21, codepoint_str, 16);
const status = std.mem.trim(u8, field_it.next() orelse continue, " ");
// Only interested in 'common' and 'full'
if (status[0] != 'C' and status[0] != 'F') continue;
const mapping = std.mem.trim(u8, field_it.next() orelse continue, " ");
var mapping_it = std.mem.splitScalar(u8, mapping, ' ');
var mapping_buf = [_]u21{0} ** 3;
var mapping_i: u8 = 0;
while (mapping_it.next()) |mapping_c| {
mapping_buf[mapping_i] = try std.fmt.parseInt(u21, mapping_c, 16);
mapping_i += 1;
}
try codepoint_mapping.putNoClobber(codepoint, mapping_buf);
}
var changes_when_casefolded_exceptions = std.ArrayList(u21).init(allocator);
defer changes_when_casefolded_exceptions.deinit();
{
// Codepoints with a case fold mapping can be missing the Changes_When_Casefolded property,
// but not vice versa.
for (codepoint_mapping.keys()) |codepoint| {
if (props_map.get(codepoint) == null) {
try changes_when_casefolded_exceptions.append(codepoint);
}
}
}
var offset_to_index = std.AutoHashMap(i32, u8).init(allocator);
defer offset_to_index.deinit();
var unique_offsets = std.AutoArrayHashMap(i32, u32).init(allocator);
defer unique_offsets.deinit();
// First pass
{
var it = codepoint_mapping.iterator();
while (it.next()) |entry| {
const codepoint = entry.key_ptr.*;
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
if (mappings.len == 1) {
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
const result = try unique_offsets.getOrPut(offset);
if (!result.found_existing) result.value_ptr.* = 0;
result.value_ptr.* += 1;
}
}
// A codepoint mapping to itself (offset=0) is the most common case
try unique_offsets.put(0, 0x10FFFF);
const C = struct {
vals: []u32,
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
return ctx.vals[a_index] > ctx.vals[b_index];
}
};
unique_offsets.sort(C{ .vals = unique_offsets.values() });
var offset_it = unique_offsets.iterator();
var offset_index: u7 = 0;
while (offset_it.next()) |entry| {
try offset_to_index.put(entry.key_ptr.*, offset_index);
offset_index += 1;
}
}
var mappings_to_index = std.AutoArrayHashMap([3]u21, u8).init(allocator);
defer mappings_to_index.deinit();
var codepoint_to_index = std.AutoHashMap(u21, u8).init(allocator);
defer codepoint_to_index.deinit();
// Second pass
{
var count_multiple_codepoints: u8 = 0;
var it = codepoint_mapping.iterator();
while (it.next()) |entry| {
const codepoint = entry.key_ptr.*;
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
if (mappings.len > 1) {
const result = try mappings_to_index.getOrPut(entry.value_ptr.*);
if (!result.found_existing) {
result.value_ptr.* = 0x80 | count_multiple_codepoints;
count_multiple_codepoints += 1;
}
const index = result.value_ptr.*;
try codepoint_to_index.put(codepoint, index);
} else {
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
const index = offset_to_index.get(offset).?;
try codepoint_to_index.put(codepoint, index);
}
}
}
// Build the stage1/stage2/stage3 arrays and output them
{
const Block = [256]u8;
var stage2_blocks = std.AutoArrayHashMap(Block, void).init(allocator);
defer stage2_blocks.deinit();
const empty_block: Block = [_]u8{0} ** 256;
try stage2_blocks.put(empty_block, {});
const stage1_len = (0x10FFFF / 256) + 1;
var stage1: [stage1_len]u8 = undefined;
var codepoint: u21 = 0;
var block: Block = undefined;
while (codepoint <= 0x10FFFF) {
const data_index = codepoint_to_index.get(codepoint) orelse 0;
block[codepoint % 256] = data_index;
codepoint += 1;
if (codepoint % 256 == 0) {
const result = try stage2_blocks.getOrPut(block);
const index = result.index;
stage1[(codepoint >> 8) - 1] = @intCast(index);
}
}
const last_meaningful_block = std.mem.lastIndexOfNone(u8, &stage1, "\x00").?;
const meaningful_stage1 = stage1[0 .. last_meaningful_block + 1];
const codepoint_cutoff = (last_meaningful_block + 1) << 8;
const multiple_codepoint_start: usize = unique_offsets.count();
var index: usize = 0;
const stage3_elems = unique_offsets.count() + mappings_to_index.count() * 3;
var stage3 = try allocator.alloc(i24, stage3_elems);
defer allocator.free(stage3);
for (unique_offsets.keys()) |key| {
stage3[index] = @intCast(key);
index += 1;
}
for (mappings_to_index.keys()) |key| {
stage3[index] = @intCast(key[0]);
stage3[index + 1] = @intCast(key[1]);
stage3[index + 2] = @intCast(key[2]);
index += 3;
}
const stage2_elems = stage2_blocks.count() * 256;
var stage2 = try allocator.alloc(u8, stage2_elems);
defer allocator.free(stage2);
for (stage2_blocks.keys(), 0..) |key, i| {
@memcpy(stage2[i * 256 ..][0..256], &key);
}
// Write out compressed binary data file.
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
// Table metadata.
try writer.writeInt(u24, @intCast(codepoint_cutoff), endian);
try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian);
// Stage 1
try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian);
try writer.writeAll(meaningful_stage1);
// Stage 2
try writer.writeInt(u16, @intCast(stage2.len), endian);
try writer.writeAll(stage2);
// Stage 3
try writer.writeInt(u16, @intCast(stage3.len), endian);
for (stage3) |offset| try writer.writeInt(i24, offset, endian);
// Changes when case folded
// Min and max
try writer.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian);
try writer.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian);
try writer.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian);
for (changes_when_casefolded_exceptions.items) |cp| try writer.writeInt(u24, cp, endian);
try out_comp.flush();
}
}

248
deps/zg/codegen/gbp.zig vendored Normal file
View File

@@ -0,0 +1,248 @@
const std = @import("std");
const builtin = @import("builtin");
const Indic = enum {
none,
Consonant,
Extend,
Linker,
};
const Gbp = enum {
none,
Control,
CR,
Extend,
L,
LF,
LV,
LVT,
Prepend,
Regional_Indicator,
SpacingMark,
T,
V,
ZWJ,
};
const block_size = 256;
const Block = [block_size]u16;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u16, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var indic_map = std.AutoHashMap(u21, Indic).init(allocator);
defer indic_map.deinit();
var gbp_map = std.AutoHashMap(u21, Gbp).init(allocator);
defer gbp_map.deinit();
var emoji_set = std.AutoHashMap(u21, void).init(allocator);
defer emoji_set.deinit();
var line_buf: [4096]u8 = undefined;
// Process Indic
var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer indic_file.close();
var indic_buf = std.io.bufferedReader(indic_file.reader());
const indic_reader = indic_buf.reader();
while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.indexOf(u8, line, "InCB") == null) continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
2 => {
// Prop
const prop = std.meta.stringToEnum(Indic, field) orelse return error.InvalidPorp;
for (current_code[0]..current_code[1] + 1) |cp| try indic_map.put(@intCast(cp), prop);
},
else => {},
}
}
}
// Process GBP
var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
defer gbp_file.close();
var gbp_buf = std.io.bufferedReader(gbp_file.reader());
const gbp_reader = gbp_buf.reader();
while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Prop
const prop = std.meta.stringToEnum(Gbp, field) orelse return error.InvalidPorp;
for (current_code[0]..current_code[1] + 1) |cp| try gbp_map.put(@intCast(cp), prop);
},
else => {},
}
}
}
// Process Emoji
var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
defer emoji_file.close();
var emoji_buf = std.io.bufferedReader(emoji_file.reader());
const emoji_reader = emoji_buf.reader();
while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
for (from..to + 1) |cp| try emoji_set.put(@intCast(cp), {});
} else {
const cp = try std.fmt.parseInt(u21, field, 16);
try emoji_set.put(@intCast(cp), {});
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u16).init(allocator);
defer stage2.deinit();
var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
defer stage3.deinit();
var stage3_len: u16 = 0;
var block: Block = [_]u16{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const gbp_prop: u8 = @intFromEnum(gbp_map.get(cp) orelse .none);
const indic_prop: u8 = @intFromEnum(indic_map.get(cp) orelse .none);
const emoji_prop: u1 = @intFromBool(emoji_set.contains(cp));
var props_byte: u8 = gbp_prop << 4;
props_byte |= indic_prop << 1;
props_byte |= emoji_prop;
const stage3_idx = blk: {
const gop = try stage3.getOrPut(props_byte);
if (!gop.found_existing) {
gop.value_ptr.* = stage3_len;
stage3_len += 1;
}
break :blk gop.value_ptr.*;
};
block[block_len] = stage3_idx;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u16, i, endian);
const props_bytes = stage3.keys();
try writer.writeInt(u16, @intCast(props_bytes.len), endian);
try writer.writeAll(props_bytes);
try out_comp.flush();
}

171
deps/zg/codegen/gencat.zig vendored Normal file
View File

@@ -0,0 +1,171 @@
const std = @import("std");
const builtin = @import("builtin");
const Gc = enum {
Cc, // Other, Control
Cf, // Other, Format
Cn, // Other, Unassigned
Co, // Other, Private Use
Cs, // Other, Surrogate
Ll, // Letter, Lowercase
Lm, // Letter, Modifier
Lo, // Letter, Other
Lu, // Letter, Uppercase
Lt, // Letter, Titlecase
Mc, // Mark, Spacing Combining
Me, // Mark, Enclosing
Mn, // Mark, Non-Spacing
Nd, // Number, Decimal Digit
Nl, // Number, Letter
No, // Number, Other
Pc, // Punctuation, Connector
Pd, // Punctuation, Dash
Pe, // Punctuation, Close
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
Po, // Punctuation, Other
Ps, // Punctuation, Open
Sc, // Symbol, Currency
Sk, // Symbol, Modifier
Sm, // Symbol, Math
So, // Symbol, Other
Zl, // Separator, Line
Zp, // Separator, Paragraph
Zs, // Separator, Space
};
const block_size = 256;
const Block = [block_size]u5;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u5, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u5).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedGeneralCategory.txt
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// General category
const gc = std.meta.stringToEnum(Gc, field) orelse return error.UnknownGenCat;
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(gc));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u5).init(allocator);
defer stage2.deinit();
var stage3 = std.ArrayList(u5).init(allocator);
defer stage3.deinit();
var block: Block = [_]u5{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const gc = flat_map.get(cp).?;
const stage3_idx = blk: {
for (stage3.items, 0..) |gci, j| {
if (gc == gci) break :blk j;
}
try stage3.append(gc);
break :blk stage3.items.len - 1;
};
// Process block
block[block_len] = @intCast(stage3_idx);
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

133
deps/zg/codegen/hangul.zig vendored Normal file
View File

@@ -0,0 +1,133 @@
const std = @import("std");
const builtin = @import("builtin");
const Syllable = enum {
none,
L,
LV,
LVT,
V,
T,
};
const block_size = 256;
const Block = [block_size]u3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process HangulSyllableType.txt
var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Syllable type
const st: Syllable = std.meta.stringToEnum(Syllable, field) orelse .none;
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(st));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u3).init(allocator);
defer stage2.deinit();
var block: Block = [_]u3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const st = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = st;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

57
deps/zg/codegen/lower.zig vendored Normal file
View File

@@ -0,0 +1,57 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cp: i24 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cp = try std.fmt.parseInt(i24, field, 16),
2 => if (line[0] == '<') continue :lines,
13 => {
// Simple lowercase mapping
if (field.len == 0) continue :lines;
try writer.writeInt(i24, cp, endian);
const mapping = try std.fmt.parseInt(i24, field, 16);
try writer.writeInt(i24, mapping - cp, endian);
},
else => {},
}
}
}
try writer.writeInt(u24, 0, endian);
try out_comp.flush();
}

134
deps/zg/codegen/normp.zig vendored Normal file
View File

@@ -0,0 +1,134 @@
const std = @import("std");
const builtin = @import("builtin");
const block_size = 256;
const Block = [block_size]u3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedNormalizationProps.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Norm props
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
if (std.mem.eql(u8, field, "NFD_QC")) {
gop.value_ptr.* |= 1;
} else if (std.mem.eql(u8, field, "NFKD_QC")) {
gop.value_ptr.* |= 2;
} else if (std.mem.eql(u8, field, "Full_Composition_Exclusion")) {
gop.value_ptr.* |= 4;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u3).init(allocator);
defer stage2.deinit();
var block: Block = [_]u3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const props = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = props;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

135
deps/zg/codegen/numeric.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedNumericType.txt
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Numeric type
var bit: u8 = 0;
if (mem.eql(u8, field, "Numeric")) bit = 1;
if (mem.eql(u8, field, "Digit")) bit = 2;
if (mem.eql(u8, field, "Decimal")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const nt = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = nt;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

135
deps/zg/codegen/props.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process PropList.txt
var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
var bit: u8 = 0;
if (mem.eql(u8, field, "White_Space")) bit = 1;
if (mem.eql(u8, field, "Hex_Digit")) bit = 2;
if (mem.eql(u8, field, "Diacritic")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

308
deps/zg/codegen/scripts.zig vendored Normal file
View File

@@ -0,0 +1,308 @@
const std = @import("std");
const builtin = @import("builtin");
const Script = enum {
none,
Adlam,
Ahom,
Anatolian_Hieroglyphs,
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bassa_Vah,
Batak,
Bengali,
Bhaiksuki,
Bopomofo,
Brahmi,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
Cypriot,
Cypro_Minoan,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
Elymaic,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Grantha,
Greek,
Gujarati,
Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kawi,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
Lao,
Latin,
Lepcha,
Limbu,
Linear_A,
Linear_B,
Lisu,
Lycian,
Lydian,
Mahajani,
Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
Meroitic_Hieroglyphs,
Miao,
Modi,
Mongolian,
Mro,
Multani,
Myanmar,
Nabataean,
Nag_Mundari,
Nandinagari,
New_Tai_Lue,
Newa,
Nko,
Nushu,
Nyiakeng_Puachue_Hmong,
Ogham,
Ol_Chiki,
Old_Hungarian,
Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Osage,
Osmanya,
Pahawh_Hmong,
Palmyrene,
Pau_Cin_Hau,
Phags_Pa,
Phoenician,
Psalter_Pahlavi,
Rejang,
Runic,
Samaritan,
Saurashtra,
Sharada,
Shavian,
Siddham,
SignWriting,
Sinhala,
Sogdian,
Sora_Sompeng,
Soyombo,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Takri,
Tamil,
Tangsa,
Tangut,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Toto,
Ugaritic,
Vai,
Vithkuqi,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square,
};
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedGeneralCategory.txt
var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Script
const script = std.meta.stringToEnum(Script, field) orelse {
std.debug.print("Unknown script: {s}\n", .{field});
return error.UnknownScript;
};
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(script));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var stage3 = std.ArrayList(u8).init(allocator);
defer stage3.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const script = flat_map.get(cp) orelse 0;
const stage3_idx = blk: {
for (stage3.items, 0..) |script_i, j| {
if (script == script_i) break :blk j;
}
try stage3.append(script);
break :blk stage3.items.len - 1;
};
// Process block
block[block_len] = @intCast(stage3_idx);
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

57
deps/zg/codegen/upper.zig vendored Normal file
View File

@@ -0,0 +1,57 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cp: i24 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cp = try std.fmt.parseInt(i24, field, 16),
2 => if (line[0] == '<') continue :lines,
12 => {
// Simple uppercase mapping
if (field.len == 0) continue :lines;
try writer.writeInt(i24, cp, endian);
const mapping = try std.fmt.parseInt(i24, field, 16);
try writer.writeInt(i24, mapping - cp, endian);
},
else => {},
}
}
}
try writer.writeInt(u24, 0, endian);
try out_comp.flush();
}