init
I will never get tired of vendoring dependencies. ha ha. It is possible I am insane. I had to do a lot of pruning to get these not to be ridiculous (especially the unicode data, which had nearly 1 million lines of... stuff).
This commit is contained in:
67
deps/zg/codegen/canon.zig
vendored
Normal file
67
deps/zg/codegen/canon.zig
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cps: [3]u24 = undefined;
|
||||
var len: u8 = 2;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
|
||||
|
||||
5 => {
|
||||
// Not canonical.
|
||||
if (field.len == 0 or field[0] == '<') continue :lines;
|
||||
if (std.mem.indexOfScalar(u8, field, ' ')) |space| {
|
||||
// Canonical
|
||||
len = 3;
|
||||
cps[1] = try std.fmt.parseInt(u24, field[0..space], 16);
|
||||
cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16);
|
||||
} else {
|
||||
// Singleton
|
||||
cps[1] = try std.fmt.parseInt(u24, field, 16);
|
||||
}
|
||||
},
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u8, @intCast(len), endian);
|
||||
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
|
||||
}
|
||||
|
||||
try writer.writeInt(u16, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/case_prop.zig
vendored
Normal file
135
deps/zg/codegen/case_prop.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Props
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Lowercase")) bit = 1;
|
||||
if (mem.eql(u8, field, "Uppercase")) bit = 2;
|
||||
if (mem.eql(u8, field, "Cased")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
124
deps/zg/codegen/ccc.zig
vendored
Normal file
124
deps/zg/codegen/ccc.zig
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCombiningClass.txt
|
||||
var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{});
|
||||
defer cc_file.close();
|
||||
var cc_buf = std.io.bufferedReader(cc_file.reader());
|
||||
const cc_reader = cc_buf.reader();
|
||||
|
||||
while (try cc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Combining Class
|
||||
if (std.mem.eql(u8, field, "0")) continue;
|
||||
const cc = try std.fmt.parseInt(u8, field, 10);
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), cc);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const cc = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = cc;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
64
deps/zg/codegen/compat.zig
vendored
Normal file
64
deps/zg/codegen/compat.zig
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cps: [19]u24 = undefined;
|
||||
var len: u8 = 1;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
|
||||
|
||||
5 => {
|
||||
// Not compatibility.
|
||||
if (field.len == 0 or field[0] != '<') continue :lines;
|
||||
var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
|
||||
_ = cp_iter.next(); // <compat type>
|
||||
|
||||
while (cp_iter.next()) |cp_str| : (len += 1) {
|
||||
cps[len] = try std.fmt.parseInt(u24, cp_str, 16);
|
||||
}
|
||||
},
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u8, @intCast(len), endian);
|
||||
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
|
||||
}
|
||||
|
||||
try writer.writeInt(u16, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
138
deps/zg/codegen/core_props.zig
vendored
Normal file
138
deps/zg/codegen/core_props.zig
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Math")) bit = 1;
|
||||
if (mem.eql(u8, field, "Alphabetic")) bit = 2;
|
||||
if (mem.eql(u8, field, "ID_Start")) bit = 4;
|
||||
if (mem.eql(u8, field, "ID_Continue")) bit = 8;
|
||||
if (mem.eql(u8, field, "XID_Start")) bit = 16;
|
||||
if (mem.eql(u8, field, "XID_Continue")) bit = 32;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
244
deps/zg/codegen/dwp.zig
vendored
Normal file
244
deps/zg/codegen/dwp.zig
vendored
Normal file
@@ -0,0 +1,244 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const options = @import("options");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]i3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(i3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, i3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedEastAsianWidth.txt
|
||||
var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{});
|
||||
defer deaw_file.close();
|
||||
var deaw_buf = std.io.bufferedReader(deaw_file.reader());
|
||||
const deaw_reader = deaw_buf.reader();
|
||||
|
||||
while (try deaw_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
// @missing ranges
|
||||
if (std.mem.startsWith(u8, line, "# @missing: ")) {
|
||||
const semi = std.mem.indexOfScalar(u8, line, ';').?;
|
||||
const field = line[12..semi];
|
||||
const dots = std.mem.indexOf(u8, field, "..").?;
|
||||
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
|
||||
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
|
||||
if (from == 0 and to == 0x10ffff) continue;
|
||||
for (from..to + 1) |cp| try flat_map.put(@intCast(cp), 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Width
|
||||
if (std.mem.eql(u8, field, "W") or
|
||||
std.mem.eql(u8, field, "F") or
|
||||
(options.cjk and std.mem.eql(u8, field, "A")))
|
||||
{
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 2);
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
|
||||
defer dgc_file.close();
|
||||
var dgc_buf = std.io.bufferedReader(dgc_file.reader());
|
||||
const dgc_reader = dgc_buf.reader();
|
||||
|
||||
while (try dgc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// General category
|
||||
if (std.mem.eql(u8, field, "Mn")) {
|
||||
// Nonspacing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Me")) {
|
||||
// Enclosing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Mc")) {
|
||||
// Spacing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Cf")) {
|
||||
if (std.mem.indexOf(u8, line, "ARABIC") == null) {
|
||||
// Format except Arabic
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(i3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]i3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
var width = flat_map.get(cp) orelse 1;
|
||||
|
||||
// Specific overrides
|
||||
switch (cp) {
|
||||
// Three-em dash
|
||||
0x2e3b => width = 3,
|
||||
|
||||
// C0/C1 control codes
|
||||
0...0x20,
|
||||
0x80...0xa0,
|
||||
|
||||
// Line separator
|
||||
0x2028,
|
||||
|
||||
// Paragraph separator
|
||||
0x2029,
|
||||
|
||||
// Hangul syllable and ignorable.
|
||||
0x1160...0x11ff,
|
||||
0xd7b0...0xd7ff,
|
||||
0x2060...0x206f,
|
||||
0xfff0...0xfff8,
|
||||
0xe0000...0xE0fff,
|
||||
=> width = 0,
|
||||
|
||||
// Two-em dash
|
||||
0x2e3a,
|
||||
|
||||
// Regional indicators
|
||||
0x1f1e6...0x1f200,
|
||||
|
||||
// CJK Blocks
|
||||
0x3400...0x4dbf, // CJK Unified Ideographs Extension A
|
||||
0x4e00...0x9fff, // CJK Unified Ideographs
|
||||
0xf900...0xfaff, // CJK Compatibility Ideographs
|
||||
0x20000...0x2fffd, // Plane 2
|
||||
0x30000...0x3fffd, // Plane 3
|
||||
=> width = 2,
|
||||
|
||||
else => {},
|
||||
}
|
||||
|
||||
// ASCII
|
||||
if (0x20 <= cp and cp < 0x7f) width = 1;
|
||||
|
||||
// Soft hyphen
|
||||
if (cp == 0xad) width = 1;
|
||||
|
||||
// Backspace and delete
|
||||
if (cp == 0x8 or cp == 0x7f) width = -1;
|
||||
|
||||
// Process block
|
||||
block[block_len] = width;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(i8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
252
deps/zg/codegen/fold.zig
vendored
Normal file
252
deps/zg/codegen/fold.zig
vendored
Normal file
@@ -0,0 +1,252 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer std.debug.assert(gpa.deinit() == .ok);
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer props_file.close();
|
||||
var props_buf = std.io.bufferedReader(props_file.reader());
|
||||
const props_reader = props_buf.reader();
|
||||
|
||||
var props_map = std.AutoHashMap(u21, void).init(allocator);
|
||||
defer props_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
|
||||
defer codepoint_mapping.deinit();
|
||||
|
||||
// Process CaseFolding.txt
|
||||
var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
|
||||
defer cp_file.close();
|
||||
var cp_buf = std.io.bufferedReader(cp_file.reader());
|
||||
const cp_reader = cp_buf.reader();
|
||||
|
||||
while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
var field_it = std.mem.splitScalar(u8, line, ';');
|
||||
const codepoint_str = field_it.first();
|
||||
const codepoint = try std.fmt.parseUnsigned(u21, codepoint_str, 16);
|
||||
|
||||
const status = std.mem.trim(u8, field_it.next() orelse continue, " ");
|
||||
// Only interested in 'common' and 'full'
|
||||
if (status[0] != 'C' and status[0] != 'F') continue;
|
||||
|
||||
const mapping = std.mem.trim(u8, field_it.next() orelse continue, " ");
|
||||
var mapping_it = std.mem.splitScalar(u8, mapping, ' ');
|
||||
var mapping_buf = [_]u21{0} ** 3;
|
||||
var mapping_i: u8 = 0;
|
||||
while (mapping_it.next()) |mapping_c| {
|
||||
mapping_buf[mapping_i] = try std.fmt.parseInt(u21, mapping_c, 16);
|
||||
mapping_i += 1;
|
||||
}
|
||||
|
||||
try codepoint_mapping.putNoClobber(codepoint, mapping_buf);
|
||||
}
|
||||
|
||||
var changes_when_casefolded_exceptions = std.ArrayList(u21).init(allocator);
|
||||
defer changes_when_casefolded_exceptions.deinit();
|
||||
|
||||
{
|
||||
// Codepoints with a case fold mapping can be missing the Changes_When_Casefolded property,
|
||||
// but not vice versa.
|
||||
for (codepoint_mapping.keys()) |codepoint| {
|
||||
if (props_map.get(codepoint) == null) {
|
||||
try changes_when_casefolded_exceptions.append(codepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var offset_to_index = std.AutoHashMap(i32, u8).init(allocator);
|
||||
defer offset_to_index.deinit();
|
||||
var unique_offsets = std.AutoArrayHashMap(i32, u32).init(allocator);
|
||||
defer unique_offsets.deinit();
|
||||
|
||||
// First pass
|
||||
{
|
||||
var it = codepoint_mapping.iterator();
|
||||
while (it.next()) |entry| {
|
||||
const codepoint = entry.key_ptr.*;
|
||||
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
|
||||
if (mappings.len == 1) {
|
||||
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
|
||||
const result = try unique_offsets.getOrPut(offset);
|
||||
if (!result.found_existing) result.value_ptr.* = 0;
|
||||
result.value_ptr.* += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// A codepoint mapping to itself (offset=0) is the most common case
|
||||
try unique_offsets.put(0, 0x10FFFF);
|
||||
const C = struct {
|
||||
vals: []u32,
|
||||
|
||||
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
|
||||
return ctx.vals[a_index] > ctx.vals[b_index];
|
||||
}
|
||||
};
|
||||
unique_offsets.sort(C{ .vals = unique_offsets.values() });
|
||||
|
||||
var offset_it = unique_offsets.iterator();
|
||||
var offset_index: u7 = 0;
|
||||
while (offset_it.next()) |entry| {
|
||||
try offset_to_index.put(entry.key_ptr.*, offset_index);
|
||||
offset_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
var mappings_to_index = std.AutoArrayHashMap([3]u21, u8).init(allocator);
|
||||
defer mappings_to_index.deinit();
|
||||
var codepoint_to_index = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer codepoint_to_index.deinit();
|
||||
|
||||
// Second pass
|
||||
{
|
||||
var count_multiple_codepoints: u8 = 0;
|
||||
|
||||
var it = codepoint_mapping.iterator();
|
||||
while (it.next()) |entry| {
|
||||
const codepoint = entry.key_ptr.*;
|
||||
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
|
||||
if (mappings.len > 1) {
|
||||
const result = try mappings_to_index.getOrPut(entry.value_ptr.*);
|
||||
if (!result.found_existing) {
|
||||
result.value_ptr.* = 0x80 | count_multiple_codepoints;
|
||||
count_multiple_codepoints += 1;
|
||||
}
|
||||
const index = result.value_ptr.*;
|
||||
try codepoint_to_index.put(codepoint, index);
|
||||
} else {
|
||||
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
|
||||
const index = offset_to_index.get(offset).?;
|
||||
try codepoint_to_index.put(codepoint, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build the stage1/stage2/stage3 arrays and output them
|
||||
{
|
||||
const Block = [256]u8;
|
||||
var stage2_blocks = std.AutoArrayHashMap(Block, void).init(allocator);
|
||||
defer stage2_blocks.deinit();
|
||||
|
||||
const empty_block: Block = [_]u8{0} ** 256;
|
||||
try stage2_blocks.put(empty_block, {});
|
||||
const stage1_len = (0x10FFFF / 256) + 1;
|
||||
var stage1: [stage1_len]u8 = undefined;
|
||||
|
||||
var codepoint: u21 = 0;
|
||||
var block: Block = undefined;
|
||||
while (codepoint <= 0x10FFFF) {
|
||||
const data_index = codepoint_to_index.get(codepoint) orelse 0;
|
||||
block[codepoint % 256] = data_index;
|
||||
|
||||
codepoint += 1;
|
||||
if (codepoint % 256 == 0) {
|
||||
const result = try stage2_blocks.getOrPut(block);
|
||||
const index = result.index;
|
||||
stage1[(codepoint >> 8) - 1] = @intCast(index);
|
||||
}
|
||||
}
|
||||
|
||||
const last_meaningful_block = std.mem.lastIndexOfNone(u8, &stage1, "\x00").?;
|
||||
const meaningful_stage1 = stage1[0 .. last_meaningful_block + 1];
|
||||
const codepoint_cutoff = (last_meaningful_block + 1) << 8;
|
||||
const multiple_codepoint_start: usize = unique_offsets.count();
|
||||
|
||||
var index: usize = 0;
|
||||
const stage3_elems = unique_offsets.count() + mappings_to_index.count() * 3;
|
||||
var stage3 = try allocator.alloc(i24, stage3_elems);
|
||||
defer allocator.free(stage3);
|
||||
for (unique_offsets.keys()) |key| {
|
||||
stage3[index] = @intCast(key);
|
||||
index += 1;
|
||||
}
|
||||
for (mappings_to_index.keys()) |key| {
|
||||
stage3[index] = @intCast(key[0]);
|
||||
stage3[index + 1] = @intCast(key[1]);
|
||||
stage3[index + 2] = @intCast(key[2]);
|
||||
index += 3;
|
||||
}
|
||||
|
||||
const stage2_elems = stage2_blocks.count() * 256;
|
||||
var stage2 = try allocator.alloc(u8, stage2_elems);
|
||||
defer allocator.free(stage2);
|
||||
for (stage2_blocks.keys(), 0..) |key, i| {
|
||||
@memcpy(stage2[i * 256 ..][0..256], &key);
|
||||
}
|
||||
|
||||
// Write out compressed binary data file.
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
// Table metadata.
|
||||
try writer.writeInt(u24, @intCast(codepoint_cutoff), endian);
|
||||
try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian);
|
||||
// Stage 1
|
||||
try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian);
|
||||
try writer.writeAll(meaningful_stage1);
|
||||
// Stage 2
|
||||
try writer.writeInt(u16, @intCast(stage2.len), endian);
|
||||
try writer.writeAll(stage2);
|
||||
// Stage 3
|
||||
try writer.writeInt(u16, @intCast(stage3.len), endian);
|
||||
for (stage3) |offset| try writer.writeInt(i24, offset, endian);
|
||||
// Changes when case folded
|
||||
// Min and max
|
||||
try writer.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian);
|
||||
try writer.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian);
|
||||
try writer.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian);
|
||||
for (changes_when_casefolded_exceptions.items) |cp| try writer.writeInt(u24, cp, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
||||
}
|
248
deps/zg/codegen/gbp.zig
vendored
Normal file
248
deps/zg/codegen/gbp.zig
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Indic = enum {
|
||||
none,
|
||||
|
||||
Consonant,
|
||||
Extend,
|
||||
Linker,
|
||||
};
|
||||
|
||||
const Gbp = enum {
|
||||
none,
|
||||
|
||||
Control,
|
||||
CR,
|
||||
Extend,
|
||||
L,
|
||||
LF,
|
||||
LV,
|
||||
LVT,
|
||||
Prepend,
|
||||
Regional_Indicator,
|
||||
SpacingMark,
|
||||
T,
|
||||
V,
|
||||
ZWJ,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u16;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u16, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var indic_map = std.AutoHashMap(u21, Indic).init(allocator);
|
||||
defer indic_map.deinit();
|
||||
|
||||
var gbp_map = std.AutoHashMap(u21, Gbp).init(allocator);
|
||||
defer gbp_map.deinit();
|
||||
|
||||
var emoji_set = std.AutoHashMap(u21, void).init(allocator);
|
||||
defer emoji_set.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process Indic
|
||||
var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer indic_file.close();
|
||||
var indic_buf = std.io.bufferedReader(indic_file.reader());
|
||||
const indic_reader = indic_buf.reader();
|
||||
|
||||
while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
if (std.mem.indexOf(u8, line, "InCB") == null) continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
2 => {
|
||||
// Prop
|
||||
const prop = std.meta.stringToEnum(Indic, field) orelse return error.InvalidPorp;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try indic_map.put(@intCast(cp), prop);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process GBP
|
||||
var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
|
||||
defer gbp_file.close();
|
||||
var gbp_buf = std.io.bufferedReader(gbp_file.reader());
|
||||
const gbp_reader = gbp_buf.reader();
|
||||
|
||||
while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Prop
|
||||
const prop = std.meta.stringToEnum(Gbp, field) orelse return error.InvalidPorp;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try gbp_map.put(@intCast(cp), prop);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process Emoji
|
||||
var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
|
||||
defer emoji_file.close();
|
||||
var emoji_buf = std.io.bufferedReader(emoji_file.reader());
|
||||
const emoji_reader = emoji_buf.reader();
|
||||
|
||||
while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
|
||||
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
|
||||
for (from..to + 1) |cp| try emoji_set.put(@intCast(cp), {});
|
||||
} else {
|
||||
const cp = try std.fmt.parseInt(u21, field, 16);
|
||||
try emoji_set.put(@intCast(cp), {});
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u16).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
|
||||
defer stage3.deinit();
|
||||
var stage3_len: u16 = 0;
|
||||
|
||||
var block: Block = [_]u16{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const gbp_prop: u8 = @intFromEnum(gbp_map.get(cp) orelse .none);
|
||||
const indic_prop: u8 = @intFromEnum(indic_map.get(cp) orelse .none);
|
||||
const emoji_prop: u1 = @intFromBool(emoji_set.contains(cp));
|
||||
var props_byte: u8 = gbp_prop << 4;
|
||||
props_byte |= indic_prop << 1;
|
||||
props_byte |= emoji_prop;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
const gop = try stage3.getOrPut(props_byte);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = stage3_len;
|
||||
stage3_len += 1;
|
||||
}
|
||||
|
||||
break :blk gop.value_ptr.*;
|
||||
};
|
||||
|
||||
block[block_len] = stage3_idx;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
const props_bytes = stage3.keys();
|
||||
try writer.writeInt(u16, @intCast(props_bytes.len), endian);
|
||||
try writer.writeAll(props_bytes);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
171
deps/zg/codegen/gencat.zig
vendored
Normal file
171
deps/zg/codegen/gencat.zig
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Gc = enum {
|
||||
Cc, // Other, Control
|
||||
Cf, // Other, Format
|
||||
Cn, // Other, Unassigned
|
||||
Co, // Other, Private Use
|
||||
Cs, // Other, Surrogate
|
||||
Ll, // Letter, Lowercase
|
||||
Lm, // Letter, Modifier
|
||||
Lo, // Letter, Other
|
||||
Lu, // Letter, Uppercase
|
||||
Lt, // Letter, Titlecase
|
||||
Mc, // Mark, Spacing Combining
|
||||
Me, // Mark, Enclosing
|
||||
Mn, // Mark, Non-Spacing
|
||||
Nd, // Number, Decimal Digit
|
||||
Nl, // Number, Letter
|
||||
No, // Number, Other
|
||||
Pc, // Punctuation, Connector
|
||||
Pd, // Punctuation, Dash
|
||||
Pe, // Punctuation, Close
|
||||
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
|
||||
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
|
||||
Po, // Punctuation, Other
|
||||
Ps, // Punctuation, Open
|
||||
Sc, // Symbol, Currency
|
||||
Sk, // Symbol, Modifier
|
||||
Sm, // Symbol, Math
|
||||
So, // Symbol, Other
|
||||
Zl, // Separator, Line
|
||||
Zp, // Separator, Paragraph
|
||||
Zs, // Separator, Space
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u5;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u5, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u5).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// General category
|
||||
const gc = std.meta.stringToEnum(Gc, field) orelse return error.UnknownGenCat;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(gc));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u5).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.ArrayList(u5).init(allocator);
|
||||
defer stage3.deinit();
|
||||
|
||||
var block: Block = [_]u5{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const gc = flat_map.get(cp).?;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
for (stage3.items, 0..) |gci, j| {
|
||||
if (gc == gci) break :blk j;
|
||||
}
|
||||
try stage3.append(gc);
|
||||
break :blk stage3.items.len - 1;
|
||||
};
|
||||
|
||||
// Process block
|
||||
block[block_len] = @intCast(stage3_idx);
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
|
||||
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
133
deps/zg/codegen/hangul.zig
vendored
Normal file
133
deps/zg/codegen/hangul.zig
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Syllable = enum {
|
||||
none,
|
||||
L,
|
||||
LV,
|
||||
LVT,
|
||||
V,
|
||||
T,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process HangulSyllableType.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Syllable type
|
||||
const st: Syllable = std.meta.stringToEnum(Syllable, field) orelse .none;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(st));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const st = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = st;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
57
deps/zg/codegen/lower.zig
vendored
Normal file
57
deps/zg/codegen/lower.zig
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cp: i24 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cp = try std.fmt.parseInt(i24, field, 16),
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
13 => {
|
||||
// Simple lowercase mapping
|
||||
if (field.len == 0) continue :lines;
|
||||
try writer.writeInt(i24, cp, endian);
|
||||
const mapping = try std.fmt.parseInt(i24, field, 16);
|
||||
try writer.writeInt(i24, mapping - cp, endian);
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u24, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
134
deps/zg/codegen/normp.zig
vendored
Normal file
134
deps/zg/codegen/normp.zig
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedNormalizationProps.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Norm props
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
|
||||
if (std.mem.eql(u8, field, "NFD_QC")) {
|
||||
gop.value_ptr.* |= 1;
|
||||
} else if (std.mem.eql(u8, field, "NFKD_QC")) {
|
||||
gop.value_ptr.* |= 2;
|
||||
} else if (std.mem.eql(u8, field, "Full_Composition_Exclusion")) {
|
||||
gop.value_ptr.* |= 4;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const props = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = props;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/numeric.zig
vendored
Normal file
135
deps/zg/codegen/numeric.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedNumericType.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Numeric type
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Numeric")) bit = 1;
|
||||
if (mem.eql(u8, field, "Digit")) bit = 2;
|
||||
if (mem.eql(u8, field, "Decimal")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const nt = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = nt;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/props.zig
vendored
Normal file
135
deps/zg/codegen/props.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process PropList.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "White_Space")) bit = 1;
|
||||
if (mem.eql(u8, field, "Hex_Digit")) bit = 2;
|
||||
if (mem.eql(u8, field, "Diacritic")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
308
deps/zg/codegen/scripts.zig
vendored
Normal file
308
deps/zg/codegen/scripts.zig
vendored
Normal file
@@ -0,0 +1,308 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Script = enum {
|
||||
none,
|
||||
Adlam,
|
||||
Ahom,
|
||||
Anatolian_Hieroglyphs,
|
||||
Arabic,
|
||||
Armenian,
|
||||
Avestan,
|
||||
Balinese,
|
||||
Bamum,
|
||||
Bassa_Vah,
|
||||
Batak,
|
||||
Bengali,
|
||||
Bhaiksuki,
|
||||
Bopomofo,
|
||||
Brahmi,
|
||||
Braille,
|
||||
Buginese,
|
||||
Buhid,
|
||||
Canadian_Aboriginal,
|
||||
Carian,
|
||||
Caucasian_Albanian,
|
||||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
Cypriot,
|
||||
Cypro_Minoan,
|
||||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
Elbasan,
|
||||
Elymaic,
|
||||
Ethiopic,
|
||||
Georgian,
|
||||
Glagolitic,
|
||||
Gothic,
|
||||
Grantha,
|
||||
Greek,
|
||||
Gujarati,
|
||||
Gunjala_Gondi,
|
||||
Gurmukhi,
|
||||
Han,
|
||||
Hangul,
|
||||
Hanifi_Rohingya,
|
||||
Hanunoo,
|
||||
Hatran,
|
||||
Hebrew,
|
||||
Hiragana,
|
||||
Imperial_Aramaic,
|
||||
Inherited,
|
||||
Inscriptional_Pahlavi,
|
||||
Inscriptional_Parthian,
|
||||
Javanese,
|
||||
Kaithi,
|
||||
Kannada,
|
||||
Katakana,
|
||||
Kawi,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
Lao,
|
||||
Latin,
|
||||
Lepcha,
|
||||
Limbu,
|
||||
Linear_A,
|
||||
Linear_B,
|
||||
Lisu,
|
||||
Lycian,
|
||||
Lydian,
|
||||
Mahajani,
|
||||
Makasar,
|
||||
Malayalam,
|
||||
Mandaic,
|
||||
Manichaean,
|
||||
Marchen,
|
||||
Masaram_Gondi,
|
||||
Medefaidrin,
|
||||
Meetei_Mayek,
|
||||
Mende_Kikakui,
|
||||
Meroitic_Cursive,
|
||||
Meroitic_Hieroglyphs,
|
||||
Miao,
|
||||
Modi,
|
||||
Mongolian,
|
||||
Mro,
|
||||
Multani,
|
||||
Myanmar,
|
||||
Nabataean,
|
||||
Nag_Mundari,
|
||||
Nandinagari,
|
||||
New_Tai_Lue,
|
||||
Newa,
|
||||
Nko,
|
||||
Nushu,
|
||||
Nyiakeng_Puachue_Hmong,
|
||||
Ogham,
|
||||
Ol_Chiki,
|
||||
Old_Hungarian,
|
||||
Old_Italic,
|
||||
Old_North_Arabian,
|
||||
Old_Permic,
|
||||
Old_Persian,
|
||||
Old_Sogdian,
|
||||
Old_South_Arabian,
|
||||
Old_Turkic,
|
||||
Old_Uyghur,
|
||||
Oriya,
|
||||
Osage,
|
||||
Osmanya,
|
||||
Pahawh_Hmong,
|
||||
Palmyrene,
|
||||
Pau_Cin_Hau,
|
||||
Phags_Pa,
|
||||
Phoenician,
|
||||
Psalter_Pahlavi,
|
||||
Rejang,
|
||||
Runic,
|
||||
Samaritan,
|
||||
Saurashtra,
|
||||
Sharada,
|
||||
Shavian,
|
||||
Siddham,
|
||||
SignWriting,
|
||||
Sinhala,
|
||||
Sogdian,
|
||||
Sora_Sompeng,
|
||||
Soyombo,
|
||||
Sundanese,
|
||||
Syloti_Nagri,
|
||||
Syriac,
|
||||
Tagalog,
|
||||
Tagbanwa,
|
||||
Tai_Le,
|
||||
Tai_Tham,
|
||||
Tai_Viet,
|
||||
Takri,
|
||||
Tamil,
|
||||
Tangsa,
|
||||
Tangut,
|
||||
Telugu,
|
||||
Thaana,
|
||||
Thai,
|
||||
Tibetan,
|
||||
Tifinagh,
|
||||
Tirhuta,
|
||||
Toto,
|
||||
Ugaritic,
|
||||
Vai,
|
||||
Vithkuqi,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Script
|
||||
const script = std.meta.stringToEnum(Script, field) orelse {
|
||||
std.debug.print("Unknown script: {s}\n", .{field});
|
||||
return error.UnknownScript;
|
||||
};
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(script));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.ArrayList(u8).init(allocator);
|
||||
defer stage3.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const script = flat_map.get(cp) orelse 0;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
for (stage3.items, 0..) |script_i, j| {
|
||||
if (script == script_i) break :blk j;
|
||||
}
|
||||
try stage3.append(script);
|
||||
break :blk stage3.items.len - 1;
|
||||
};
|
||||
|
||||
// Process block
|
||||
block[block_len] = @intCast(stage3_idx);
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
|
||||
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
57
deps/zg/codegen/upper.zig
vendored
Normal file
57
deps/zg/codegen/upper.zig
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cp: i24 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cp = try std.fmt.parseInt(i24, field, 16),
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
12 => {
|
||||
// Simple uppercase mapping
|
||||
if (field.len == 0) continue :lines;
|
||||
try writer.writeInt(i24, cp, endian);
|
||||
const mapping = try std.fmt.parseInt(i24, field, 16);
|
||||
try writer.writeInt(i24, mapping - cp, endian);
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u24, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
Reference in New Issue
Block a user