init

I will never get tired of vendoring dependencies. ha ha. It is possible I am insane. I had to do a lot of pruning to get these not to be ridiculous (especially the unicode data, which had nearly 1 million lines of... stuff).
2024-08-09 17:32:06 -07:00
commit 7692cb4bc7
155 changed files with 206515 additions and 0 deletions
--- a/deps/zg/src/CanonData.zig
+++ b/deps/zg/src/CanonData.zig
@@ -0,0 +1,66 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+allocator: mem.Allocator,
+nfc: std.AutoHashMap([2]u21, u21),
+nfd: [][]u21 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("canon");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+    var self = Self{
+        .allocator = allocator,
+        .nfc = std.AutoHashMap([2]u21, u21).init(allocator),
+        .nfd = try allocator.alloc([]u21, 0x110000),
+    };
+
+    var slices: usize = 0;
+    errdefer {
+        self.nfc.deinit();
+        for (self.nfd[0..slices]) |slice| self.allocator.free(slice);
+        self.allocator.free(self.nfd);
+    }
+
+    @memset(self.nfd, &.{});
+
+    while (true) {
+        const len: u8 = try reader.readInt(u8, endian);
+        if (len == 0) break;
+        const cp = try reader.readInt(u24, endian);
+        self.nfd[cp] = try allocator.alloc(u21, len - 1);
+        slices += 1;
+        for (0..len - 1) |i| {
+            self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian));
+        }
+        if (len == 3) {
+            try self.nfc.put(self.nfd[cp][0..2].*, @intCast(cp));
+        }
+    }
+
+    return self;
+}
+
+pub fn deinit(self: *Self) void {
+    self.nfc.deinit();
+    for (self.nfd) |slice| self.allocator.free(slice);
+    self.allocator.free(self.nfd);
+}
+
+/// Returns canonical decomposition for `cp`.
+pub fn toNfd(self: Self, cp: u21) []const u21 {
+    return self.nfd[cp];
+}
+
+// Returns the primary composite for the codepoints in `cp`.
+pub fn toNfc(self: Self, cps: [2]u21) ?u21 {
+    return self.nfc.get(cps);
+}
--- a/deps/zg/src/CaseData.zig
+++ b/deps/zg/src/CaseData.zig
@@ -0,0 +1,202 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+const unicode = std.unicode;
+
+const CodePointIterator = @import("code_point").Iterator;
+
+allocator: mem.Allocator,
+case_map: [][2]u21,
+prop_s1: []u16 = undefined,
+prop_s2: []u8 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{
+        .allocator = allocator,
+        .case_map = try allocator.alloc([2]u21, 0x110000),
+    };
+    errdefer allocator.free(self.case_map);
+
+    for (0..0x110000) |i| {
+        const cp: u21 = @intCast(i);
+        self.case_map[cp] = .{ cp, cp };
+    }
+
+    // Uppercase
+    const upper_bytes = @embedFile("upper");
+    var upper_fbs = std.io.fixedBufferStream(upper_bytes);
+    var upper_decomp = decompressor(.raw, upper_fbs.reader());
+    var upper_reader = upper_decomp.reader();
+
+    while (true) {
+        const cp = try upper_reader.readInt(i24, endian);
+        if (cp == 0) break;
+        const diff = try upper_reader.readInt(i24, endian);
+        self.case_map[@intCast(cp)][0] = @intCast(cp + diff);
+    }
+
+    // Lowercase
+    const lower_bytes = @embedFile("lower");
+    var lower_fbs = std.io.fixedBufferStream(lower_bytes);
+    var lower_decomp = decompressor(.raw, lower_fbs.reader());
+    var lower_reader = lower_decomp.reader();
+
+    while (true) {
+        const cp = try lower_reader.readInt(i24, endian);
+        if (cp == 0) break;
+        const diff = try lower_reader.readInt(i24, endian);
+        self.case_map[@intCast(cp)][1] = @intCast(cp + diff);
+    }
+
+    // Case properties
+    const cp_bytes = @embedFile("case_prop");
+    var cp_fbs = std.io.fixedBufferStream(cp_bytes);
+    var cp_decomp = decompressor(.raw, cp_fbs.reader());
+    var cp_reader = cp_decomp.reader();
+
+    const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
+    self.prop_s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.prop_s1);
+    for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try cp_reader.readInt(u16, endian);
+    self.prop_s2 = try allocator.alloc(u8, stage_2_len);
+    errdefer allocator.free(self.prop_s2);
+    _ = try cp_reader.readAll(self.prop_s2);
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.case_map);
+    self.allocator.free(self.prop_s1);
+    self.allocator.free(self.prop_s2);
+}
+
+// Returns true if `cp` is either upper, lower, or title case.
+pub fn isCased(self: Self, cp: u21) bool {
+    return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
+}
+
+// Returns true if `cp` is uppercase.
+pub fn isUpper(self: Self, cp: u21) bool {
+    return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
+}
+
+/// Returns true if `str` is all uppercase.
+pub fn isUpperStr(self: Self, str: []const u8) bool {
+    var iter = CodePointIterator{ .bytes = str };
+
+    return while (iter.next()) |cp| {
+        if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false;
+    } else true;
+}
+
+test "isUpperStr" {
+    const cd = try init(testing.allocator);
+    defer cd.deinit();
+
+    try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!"));
+    try testing.expect(!cd.isUpperStr("hello, world 2112!"));
+    try testing.expect(!cd.isUpperStr("Hello, World 2112!"));
+}
+
+/// Returns uppercase mapping for `cp`.
+pub fn toUpper(self: Self, cp: u21) u21 {
+    return self.case_map[cp][0];
+}
+
+/// Returns a new string with all letters in uppercase.
+/// Caller must free returned bytes with `allocator`.
+pub fn toUpperStr(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+) ![]u8 {
+    var bytes = std.ArrayList(u8).init(allocator);
+    defer bytes.deinit();
+
+    var iter = CodePointIterator{ .bytes = str };
+    var buf: [4]u8 = undefined;
+
+    while (iter.next()) |cp| {
+        const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf);
+        try bytes.appendSlice(buf[0..len]);
+    }
+
+    return try bytes.toOwnedSlice();
+}
+
+test "toUpperStr" {
+    const cd = try init(testing.allocator);
+    defer cd.deinit();
+
+    const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!");
+    defer testing.allocator.free(uppered);
+    try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
+}
+
+// Returns true if `cp` is lowercase.
+pub fn isLower(self: Self, cp: u21) bool {
+    return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
+}
+
+/// Returns true if `str` is all lowercase.
+pub fn isLowerStr(self: Self, str: []const u8) bool {
+    var iter = CodePointIterator{ .bytes = str };
+
+    return while (iter.next()) |cp| {
+        if (self.isCased(cp.code) and !self.isLower(cp.code)) break false;
+    } else true;
+}
+
+test "isLowerStr" {
+    const cd = try init(testing.allocator);
+    defer cd.deinit();
+
+    try testing.expect(cd.isLowerStr("hello, world 2112!"));
+    try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!"));
+    try testing.expect(!cd.isLowerStr("Hello, World 2112!"));
+}
+
+/// Returns lowercase mapping for `cp`.
+pub fn toLower(self: Self, cp: u21) u21 {
+    return self.case_map[cp][1];
+}
+
+/// Returns a new string with all letters in lowercase.
+/// Caller must free returned bytes with `allocator`.
+pub fn toLowerStr(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+) ![]u8 {
+    var bytes = std.ArrayList(u8).init(allocator);
+    defer bytes.deinit();
+
+    var iter = CodePointIterator{ .bytes = str };
+    var buf: [4]u8 = undefined;
+
+    while (iter.next()) |cp| {
+        const len = try unicode.utf8Encode(self.toLower(cp.code), &buf);
+        try bytes.appendSlice(buf[0..len]);
+    }
+
+    return try bytes.toOwnedSlice();
+}
+
+test "toLowerStr" {
+    const cd = try init(testing.allocator);
+    defer cd.deinit();
+
+    const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!");
+    defer testing.allocator.free(lowered);
+    try testing.expectEqualStrings("hello, world 2112!", lowered);
+}
--- a/deps/zg/src/CaseFold.zig
+++ b/deps/zg/src/CaseFold.zig
@@ -0,0 +1,189 @@
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+
+const ascii = @import("ascii");
+pub const FoldData = @import("FoldData");
+const Normalize = @import("Normalize");
+
+fold_data: *const FoldData,
+
+const Self = @This();
+
+/// Produces the case folded code points for `cps`. Caller must free returned
+/// slice with `allocator`.
+pub fn caseFold(
+    self: Self,
+    allocator: mem.Allocator,
+    cps: []const u21,
+) ![]const u21 {
+    var cfcps = std.ArrayList(u21).init(allocator);
+    defer cfcps.deinit();
+    var buf: [3]u21 = undefined;
+
+    for (cps) |cp| {
+        const cf = self.fold_data.caseFold(cp, &buf);
+
+        if (cf.len == 0) {
+            try cfcps.append(cp);
+        } else {
+            try cfcps.appendSlice(cf);
+        }
+    }
+
+    return try cfcps.toOwnedSlice();
+}
+
+fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
+    return for (cps) |cp| {
+        if (self.fold_data.changesWhenCaseFolded(cp)) break true;
+    } else false;
+}
+
+/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most
+/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
+pub fn compatCaselessMatch(
+    self: Self,
+    allocator: mem.Allocator,
+    normalizer: *const Normalize,
+    a: []const u8,
+    b: []const u8,
+) !bool {
+    if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
+
+    // Process a
+    const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
+    defer allocator.free(nfd_a);
+
+    var need_free_cf_nfd_a = false;
+    var cf_nfd_a: []const u21 = nfd_a;
+    if (self.changesWhenCaseFolded(nfd_a)) {
+        cf_nfd_a = try self.caseFold(allocator, nfd_a);
+        need_free_cf_nfd_a = true;
+    }
+    defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
+
+    const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
+    defer allocator.free(nfkd_cf_nfd_a);
+    const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
+    defer allocator.free(cf_nfkd_cf_nfd_a);
+    const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
+    defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
+
+    // Process b
+    const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
+    defer allocator.free(nfd_b);
+
+    var need_free_cf_nfd_b = false;
+    var cf_nfd_b: []const u21 = nfd_b;
+    if (self.changesWhenCaseFolded(nfd_b)) {
+        cf_nfd_b = try self.caseFold(allocator, nfd_b);
+        need_free_cf_nfd_b = true;
+    }
+    defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
+
+    const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
+    defer allocator.free(nfkd_cf_nfd_b);
+    const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
+    defer allocator.free(cf_nfkd_cf_nfd_b);
+    const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
+    defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
+
+    return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
+}
+
+test "compatCaselessMatch" {
+    const allocator = testing.allocator;
+
+    const norm_data = try Normalize.NormData.init(allocator);
+    defer norm_data.deinit();
+    const n = Normalize{ .norm_data = &norm_data };
+
+    const fold_data = try FoldData.init(allocator);
+    defer fold_data.deinit();
+    const caser = Self{ .fold_data = &fold_data };
+
+    try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
+
+    const a = "Héllo World! \u{3d3}";
+    const b = "He\u{301}llo World! \u{3a5}\u{301}";
+    try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));
+
+    const c = "He\u{301}llo World! \u{3d2}\u{301}";
+    try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
+}
+
+/// Performs canonical caseless string matching by decomposing to NFD. This is
+/// faster than `compatCaselessMatch`, but less comprehensive.
+pub fn canonCaselessMatch(
+    self: Self,
+    allocator: mem.Allocator,
+    normalizer: *const Normalize,
+    a: []const u8,
+    b: []const u8,
+) !bool {
+    if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
+
+    // Process a
+    const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
+    defer allocator.free(nfd_a);
+
+    var need_free_cf_nfd_a = false;
+    var cf_nfd_a: []const u21 = nfd_a;
+    if (self.changesWhenCaseFolded(nfd_a)) {
+        cf_nfd_a = try self.caseFold(allocator, nfd_a);
+        need_free_cf_nfd_a = true;
+    }
+    defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
+
+    var need_free_nfd_cf_nfd_a = false;
+    var nfd_cf_nfd_a = cf_nfd_a;
+    if (!need_free_cf_nfd_a) {
+        nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a);
+        need_free_nfd_cf_nfd_a = true;
+    }
+    defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
+
+    // Process b
+    const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
+    defer allocator.free(nfd_b);
+
+    var need_free_cf_nfd_b = false;
+    var cf_nfd_b: []const u21 = nfd_b;
+    if (self.changesWhenCaseFolded(nfd_b)) {
+        cf_nfd_b = try self.caseFold(allocator, nfd_b);
+        need_free_cf_nfd_b = true;
+    }
+    defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
+
+    var need_free_nfd_cf_nfd_b = false;
+    var nfd_cf_nfd_b = cf_nfd_b;
+    if (!need_free_cf_nfd_b) {
+        nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b);
+        need_free_nfd_cf_nfd_b = true;
+    }
+    defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
+
+    return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b);
+}
+
+test "canonCaselessMatch" {
+    const allocator = testing.allocator;
+
+    const norm_data = try Normalize.NormData.init(allocator);
+    defer norm_data.deinit();
+    const n = Normalize{ .norm_data = &norm_data };
+
+    const fold_data = try FoldData.init(allocator);
+    defer fold_data.deinit();
+    const caser = Self{ .fold_data = &fold_data };
+
+    try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
+
+    const a = "Héllo World! \u{3d3}";
+    const b = "He\u{301}llo World! \u{3a5}\u{301}";
+    try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b));
+
+    const c = "He\u{301}llo World! \u{3d2}\u{301}";
+    try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c));
+}
--- a/deps/zg/src/CombiningData.zig
+++ b/deps/zg/src/CombiningData.zig
@@ -0,0 +1,49 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u8 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("ccc");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{ .allocator = allocator };
+
+    const stage_1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.s1);
+    for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u8, stage_2_len);
+    errdefer allocator.free(self.s2);
+    _ = try reader.readAll(self.s2);
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+}
+
+/// Returns the canonical combining class for a code point.
+pub fn ccc(self: Self, cp: u21) u8 {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
+}
+
+/// True if `cp` is a starter code point, not a combining character.
+pub fn isStarter(self: Self, cp: u21) bool {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0;
+}
--- a/deps/zg/src/CompatData.zig
+++ b/deps/zg/src/CompatData.zig
@@ -0,0 +1,50 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+allocator: mem.Allocator,
+nfkd: [][]u21 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("compat");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+    var self = Self{
+        .allocator = allocator,
+        .nfkd = try allocator.alloc([]u21, 0x110000),
+    };
+    errdefer self.deinit();
+
+    @memset(self.nfkd, &.{});
+
+    while (true) {
+        const len: u8 = try reader.readInt(u8, endian);
+        if (len == 0) break;
+        const cp = try reader.readInt(u24, endian);
+        self.nfkd[cp] = try allocator.alloc(u21, len - 1);
+        for (0..len - 1) |i| {
+            self.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian));
+        }
+    }
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    for (self.nfkd) |slice| {
+        if (slice.len != 0) self.allocator.free(slice);
+    }
+    self.allocator.free(self.nfkd);
+}
+
+/// Returns compatibility decomposition for `cp`.
+pub fn toNfkd(self: Self, cp: u21) []u21 {
+    return self.nfkd[cp];
+}
--- a/deps/zg/src/DisplayWidth.zig
+++ b/deps/zg/src/DisplayWidth.zig
@@ -0,0 +1,355 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const ArrayList = std.ArrayList;
+const mem = std.mem;
+const simd = std.simd;
+const testing = std.testing;
+
+const ascii = @import("ascii");
+const CodePointIterator = @import("code_point").Iterator;
+const GraphemeIterator = @import("grapheme").Iterator;
+pub const DisplayWidthData = @import("DisplayWidthData");
+
+data: *const DisplayWidthData,
+
+const Self = @This();
+
+/// strWidth returns the total display width of `str` as the number of cells
+/// required in a fixed-pitch font (i.e. a terminal screen).
+pub fn strWidth(self: Self, str: []const u8) usize {
+    var total: isize = 0;
+
+    // ASCII fast path
+    if (ascii.isAsciiOnly(str)) {
+        for (str) |b| total += self.data.codePointWidth(b);
+        return @intCast(@max(0, total));
+    }
+
+    var giter = GraphemeIterator.init(str, &self.data.g_data);
+
+    while (giter.next()) |gc| {
+        var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
+        var gc_total: isize = 0;
+
+        while (cp_iter.next()) |cp| {
+            var w = self.data.codePointWidth(cp.code);
+
+            if (w != 0) {
+                // Handle text emoji sequence.
+                if (cp_iter.next()) |ncp| {
+                    // emoji text sequence.
+                    if (ncp.code == 0xFE0E) w = 1;
+                    if (ncp.code == 0xFE0F) w = 2;
+                }
+
+                // Only adding width of first non-zero-width code point.
+                if (gc_total == 0) {
+                    gc_total = w;
+                    break;
+                }
+            }
+        }
+
+        total += gc_total;
+    }
+
+    return @intCast(@max(0, total));
+}
+
+test "strWidth" {
+    const data = try DisplayWidthData.init(testing.allocator);
+    defer data.deinit();
+    const self = Self{ .data = &data };
+
+    try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n"));
+    try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}"));
+    try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
+    try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊"));
+    try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊"));
+    try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)"));
+    try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸"));
+    try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji
+    try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
+    try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
+    try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation
+    try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector
+    try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector
+    try testing.expectEqual(@as(usize, 0), self.strWidth("A\x08")); // Backspace
+    try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA")); // DEL
+    try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA\x08\x08")); // never less than o
+
+    // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
+    const empty = "";
+    try testing.expectEqual(@as(usize, 0), self.strWidth(empty));
+    const with_null = "hello\x00world";
+    try testing.expectEqual(@as(usize, 10), self.strWidth(with_null));
+    const hello_jp = "コンニチハ, セカイ!";
+    try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp));
+    const control = "\x1b[0m";
+    try testing.expectEqual(@as(usize, 3), self.strWidth(control));
+    const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
+    try testing.expectEqual(@as(usize, 3), self.strWidth(balinese));
+
+    // These commented out tests require a new specification for complex scripts.
+    // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    // const jamo = "\u{1100}\u{1160}";
+    // try testing.expectEqual(@as(usize, 3), strWidth(jamo));
+    // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
+    // try testing.expectEqual(@as(usize, 3), strWidth(devengari));
+    // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
+    // try testing.expectEqual(@as(usize, 5), strWidth(tamal));
+    // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
+    // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
+    // The following passes but as a mere coincidence.
+    const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
+    try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2));
+
+    // From Rust https://github.com/jameslanska/unicode-display-width
+    try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻"));
+    try testing.expectEqual(@as(usize, 2), self.strWidth("🦀"));
+    try testing.expectEqual(@as(usize, 2), self.strWidth("👨‍👩‍👧‍👧"));
+    try testing.expectEqual(@as(usize, 2), self.strWidth("👩‍🔬"));
+    try testing.expectEqual(@as(usize, 9), self.strWidth("sane text"));
+    try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
+    try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나"));
+    try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}"));
+}
+
+/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
+/// If the length of `str` and `total_width` have different parity, the right side of `str` will
+/// receive one additional pad. This makes sure the returned string fills the requested width.
+/// Caller must free returned bytes with `allocator`.
+pub fn center(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+    total_width: usize,
+    pad: []const u8,
+) ![]u8 {
+    const str_width = self.strWidth(str);
+    if (str_width > total_width) return error.StrTooLong;
+    if (str_width == total_width) return try allocator.dupe(u8, str);
+
+    const pad_width = self.strWidth(pad);
+    if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
+
+    const margin_width = @divFloor((total_width - str_width), 2);
+    if (pad_width > margin_width) return error.PadTooLong;
+    const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
+    const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
+
+    var result = try allocator.alloc(u8, pads * pad.len + str.len);
+    var bytes_index: usize = 0;
+    var pads_index: usize = 0;
+
+    while (pads_index < pads / 2) : (pads_index += 1) {
+        @memcpy(result[bytes_index..][0..pad.len], pad);
+        bytes_index += pad.len;
+    }
+
+    @memcpy(result[bytes_index..][0..str.len], str);
+    bytes_index += str.len;
+
+    pads_index = 0;
+    while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
+        @memcpy(result[bytes_index..][0..pad.len], pad);
+        bytes_index += pad.len;
+    }
+
+    return result;
+}
+
+test "center" {
+    const allocator = testing.allocator;
+    const data = try DisplayWidthData.init(allocator);
+    defer data.deinit();
+    const self = Self{ .data = &data };
+
+    // Input and width both have odd length
+    var centered = try self.center(allocator, "abc", 9, "*");
+    try testing.expectEqualSlices(u8, "***abc***", centered);
+
+    // Input and width both have even length
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "w😊w", 10, "-");
+    try testing.expectEqualSlices(u8, "---w😊w---", centered);
+
+    // Input has even length, width has odd length
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "1234", 9, "-");
+    try testing.expectEqualSlices(u8, "--1234---", centered);
+
+    // Input has odd length, width has even length
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "123", 8, "-");
+    try testing.expectEqualSlices(u8, "--123---", centered);
+
+    // Input is the same length as the width
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "123", 3, "-");
+    try testing.expectEqualSlices(u8, "123", centered);
+
+    // Input is empty
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "", 3, "-");
+    try testing.expectEqualSlices(u8, "---", centered);
+
+    // Input is empty and width is zero
+    testing.allocator.free(centered);
+    centered = try self.center(allocator, "", 0, "-");
+    try testing.expectEqualSlices(u8, "", centered);
+
+    // Input is longer than the width, which is an error
+    testing.allocator.free(centered);
+    try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-"));
+}
+
+/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
+/// on the left side. Caller must free returned bytes with `allocator`.
+pub fn padLeft(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+    total_width: usize,
+    pad: []const u8,
+) ![]u8 {
+    const str_width = self.strWidth(str);
+    if (str_width > total_width) return error.StrTooLong;
+
+    const pad_width = self.strWidth(pad);
+    if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
+
+    const margin_width = total_width - str_width;
+    if (pad_width > margin_width) return error.PadTooLong;
+
+    const pads = @divFloor(margin_width, pad_width);
+
+    var result = try allocator.alloc(u8, pads * pad.len + str.len);
+    var bytes_index: usize = 0;
+    var pads_index: usize = 0;
+
+    while (pads_index < pads) : (pads_index += 1) {
+        @memcpy(result[bytes_index..][0..pad.len], pad);
+        bytes_index += pad.len;
+    }
+
+    @memcpy(result[bytes_index..][0..str.len], str);
+
+    return result;
+}
+
+test "padLeft" {
+    const allocator = testing.allocator;
+    const data = try DisplayWidthData.init(allocator);
+    defer data.deinit();
+    const self = Self{ .data = &data };
+
+    var right_aligned = try self.padLeft(allocator, "abc", 9, "*");
+    defer testing.allocator.free(right_aligned);
+    try testing.expectEqualSlices(u8, "******abc", right_aligned);
+
+    testing.allocator.free(right_aligned);
+    right_aligned = try self.padLeft(allocator, "w😊w", 10, "-");
+    try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
+}
+
+/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
+/// on the right side.  Caller must free returned bytes with `allocator`.
+pub fn padRight(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+    total_width: usize,
+    pad: []const u8,
+) ![]u8 {
+    const str_width = self.strWidth(str);
+    if (str_width > total_width) return error.StrTooLong;
+
+    const pad_width = self.strWidth(pad);
+    if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
+
+    const margin_width = total_width - str_width;
+    if (pad_width > margin_width) return error.PadTooLong;
+
+    const pads = @divFloor(margin_width, pad_width);
+
+    var result = try allocator.alloc(u8, pads * pad.len + str.len);
+    var bytes_index: usize = 0;
+    var pads_index: usize = 0;
+
+    @memcpy(result[bytes_index..][0..str.len], str);
+    bytes_index += str.len;
+
+    while (pads_index < pads) : (pads_index += 1) {
+        @memcpy(result[bytes_index..][0..pad.len], pad);
+        bytes_index += pad.len;
+    }
+
+    return result;
+}
+
+test "padRight" {
+    const allocator = testing.allocator;
+    const data = try DisplayWidthData.init(allocator);
+    defer data.deinit();
+    const self = Self{ .data = &data };
+
+    var left_aligned = try self.padRight(allocator, "abc", 9, "*");
+    defer testing.allocator.free(left_aligned);
+    try testing.expectEqualSlices(u8, "abc******", left_aligned);
+
+    testing.allocator.free(left_aligned);
+    left_aligned = try self.padRight(allocator, "w😊w", 10, "-");
+    try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
+}
+
+/// Wraps a string approximately at the given number of colums per line.
+/// `threshold` defines how far the last column of the last word can be
+/// from the edge. Caller must free returned bytes with `allocator`.
+pub fn wrap(
+    self: Self,
+    allocator: mem.Allocator,
+    str: []const u8,
+    columns: usize,
+    threshold: usize,
+) ![]u8 {
+    var result = ArrayList(u8).init(allocator);
+    defer result.deinit();
+
+    var line_iter = mem.tokenizeAny(u8, str, "\r\n");
+    var line_width: usize = 0;
+
+    while (line_iter.next()) |line| {
+        var word_iter = mem.tokenizeScalar(u8, line, ' ');
+
+        while (word_iter.next()) |word| {
+            try result.appendSlice(word);
+            try result.append(' ');
+            line_width += self.strWidth(word) + 1;
+
+            if (line_width > columns or columns - line_width <= threshold) {
+                try result.append('\n');
+                line_width = 0;
+            }
+        }
+    }
+
+    // Remove trailing space and newline.
+    _ = result.pop();
+    _ = result.pop();
+
+    return try result.toOwnedSlice();
+}
+
+test "wrap" {
+    const allocator = testing.allocator;
+    const data = try DisplayWidthData.init(allocator);
+    defer data.deinit();
+    const self = Self{ .data = &data };
+
+    const input = "The quick brown fox\r\njumped over the lazy dog!";
+    const got = try self.wrap(allocator, input, 10, 3);
+    defer testing.allocator.free(got);
+    const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
+    try testing.expectEqualStrings(want, got);
+}
--- a/deps/zg/src/FoldData.zig
+++ b/deps/zg/src/FoldData.zig
@@ -0,0 +1,98 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+allocator: mem.Allocator,
+cutoff: u21 = undefined,
+cwcf_exceptions_min: u21 = undefined,
+cwcf_exceptions_max: u21 = undefined,
+cwcf_exceptions: []u21 = undefined,
+multiple_start: u21 = undefined,
+stage1: []u8 = undefined,
+stage2: []u8 = undefined,
+stage3: []i24 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("fold");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{ .allocator = allocator };
+    self.cutoff = @intCast(try reader.readInt(u24, endian));
+    self.multiple_start = @intCast(try reader.readInt(u24, endian));
+
+    var len = try reader.readInt(u16, endian);
+    self.stage1 = try allocator.alloc(u8, len);
+    errdefer allocator.free(self.stage1);
+    for (0..len) |i| self.stage1[i] = try reader.readInt(u8, endian);
+
+    len = try reader.readInt(u16, endian);
+    self.stage2 = try allocator.alloc(u8, len);
+    errdefer allocator.free(self.stage2);
+    for (0..len) |i| self.stage2[i] = try reader.readInt(u8, endian);
+
+    len = try reader.readInt(u16, endian);
+    self.stage3 = try allocator.alloc(i24, len);
+    errdefer allocator.free(self.stage3);
+    for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian);
+
+    self.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian));
+    self.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian));
+    len = try reader.readInt(u16, endian);
+    self.cwcf_exceptions = try allocator.alloc(u21, len);
+    for (0..len) |i| self.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian));
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.stage1);
+    self.allocator.free(self.stage2);
+    self.allocator.free(self.stage3);
+}
+
+/// Returns the case fold for `cp`.
+pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 {
+    if (cp >= self.cutoff) return &.{};
+
+    const stage1_val = self.stage1[cp >> 8];
+    if (stage1_val == 0) return &.{};
+
+    const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF);
+    const stage3_index = self.stage2[stage2_index];
+
+    if (stage3_index & 0x80 != 0) {
+        const real_index = @as(usize, self.multiple_start) + (stage3_index ^ 0x80) * 3;
+        const mapping = mem.sliceTo(self.stage3[real_index..][0..3], 0);
+        for (mapping, 0..) |c, i| buf[i] = @intCast(c);
+
+        return buf[0..mapping.len];
+    }
+
+    const offset = self.stage3[stage3_index];
+    if (offset == 0) return &.{};
+
+    buf[0] = @intCast(@as(i32, cp) + offset);
+
+    return buf[0..1];
+}
+
+/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
+pub fn changesWhenCaseFolded(self: Self, cp: u21) bool {
+    var buf: [3]u21 = undefined;
+    const has_mapping = self.caseFold(cp, &buf).len != 0;
+    return has_mapping and !self.isCwcfException(cp);
+}
+
+fn isCwcfException(self: Self, cp: u21) bool {
+    return cp >= self.cwcf_exceptions_min and
+        cp <= self.cwcf_exceptions_max and
+        std.mem.indexOfScalar(u21, self.cwcf_exceptions, cp) != null;
+}
--- a/deps/zg/src/GenCatData.zig
+++ b/deps/zg/src/GenCatData.zig
@@ -0,0 +1,171 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+/// General Category
+pub const Gc = enum {
+    Cc, // Other, Control
+    Cf, // Other, Format
+    Cn, // Other, Unassigned
+    Co, // Other, Private Use
+    Cs, // Other, Surrogate
+    Ll, // Letter, Lowercase
+    Lm, // Letter, Modifier
+    Lo, // Letter, Other
+    Lu, // Letter, Uppercase
+    Lt, // Letter, Titlecase
+    Mc, // Mark, Spacing Combining
+    Me, // Mark, Enclosing
+    Mn, // Mark, Non-Spacing
+    Nd, // Number, Decimal Digit
+    Nl, // Number, Letter
+    No, // Number, Other
+    Pc, // Punctuation, Connector
+    Pd, // Punctuation, Dash
+    Pe, // Punctuation, Close
+    Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
+    Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
+    Po, // Punctuation, Other
+    Ps, // Punctuation, Open
+    Sc, // Symbol, Currency
+    Sk, // Symbol, Modifier
+    Sm, // Symbol, Math
+    So, // Symbol, Other
+    Zl, // Separator, Line
+    Zp, // Separator, Paragraph
+    Zs, // Separator, Space
+};
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u5 = undefined,
+s3: []u5 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("gencat");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{ .allocator = allocator };
+
+    const s1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, s1_len);
+    errdefer allocator.free(self.s1);
+    for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const s2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u5, s2_len);
+    errdefer allocator.free(self.s2);
+    for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
+
+    const s3_len: u16 = try reader.readInt(u8, endian);
+    self.s3 = try allocator.alloc(u5, s3_len);
+    errdefer allocator.free(self.s3);
+    for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+    self.allocator.free(self.s3);
+}
+
+/// Lookup the General Category for `cp`.
+pub fn gc(self: Self, cp: u21) Gc {
+    return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
+}
+
+/// True if `cp` has an C general category.
+pub fn isControl(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Cc,
+        .Cf,
+        .Cn,
+        .Co,
+        .Cs,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an L general category.
+pub fn isLetter(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Ll,
+        .Lm,
+        .Lo,
+        .Lu,
+        .Lt,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an M general category.
+pub fn isMark(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Mc,
+        .Me,
+        .Mn,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an N general category.
+pub fn isNumber(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Nd,
+        .Nl,
+        .No,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an P general category.
+pub fn isPunctuation(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Pc,
+        .Pd,
+        .Pe,
+        .Pf,
+        .Pi,
+        .Po,
+        .Ps,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an S general category.
+pub fn isSymbol(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Sc,
+        .Sk,
+        .Sm,
+        .So,
+        => true,
+        else => false,
+    };
+}
+
+/// True if `cp` has an Z general category.
+pub fn isSeparator(self: Self, cp: u21) bool {
+    return switch (self.gc(cp)) {
+        .Zl,
+        .Zp,
+        .Zs,
+        => true,
+        else => false,
+    };
+}
--- a/deps/zg/src/GraphemeData.zig
+++ b/deps/zg/src/GraphemeData.zig
@@ -0,0 +1,88 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+
+/// Indic syllable type.
+pub const Indic = enum {
+    none,
+
+    Consonant,
+    Extend,
+    Linker,
+};
+
+/// Grapheme break property.
+pub const Gbp = enum {
+    none,
+    Control,
+    CR,
+    Extend,
+    L,
+    LF,
+    LV,
+    LVT,
+    Prepend,
+    Regional_Indicator,
+    SpacingMark,
+    T,
+    V,
+    ZWJ,
+};
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u16 = undefined,
+s3: []u8 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("gbp");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{ .allocator = allocator };
+
+    const s1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, s1_len);
+    errdefer allocator.free(self.s1);
+    for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const s2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u16, s2_len);
+    errdefer allocator.free(self.s2);
+    for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian);
+
+    const s3_len: u16 = try reader.readInt(u16, endian);
+    self.s3 = try allocator.alloc(u8, s3_len);
+    errdefer allocator.free(self.s3);
+    _ = try reader.readAll(self.s3);
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+    self.allocator.free(self.s3);
+}
+
+/// Lookup the grapheme break property for a code point.
+pub fn gbp(self: Self, cp: u21) Gbp {
+    return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
+}
+
+/// Lookup the indic syllable type for a code point.
+pub fn indic(self: Self, cp: u21) Indic {
+    return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
+}
+
+/// Lookup the indic syllable type for a code point.
+pub fn isEmoji(self: Self, cp: u21) bool {
+    return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
+}
--- a/deps/zg/src/HangulData.zig
+++ b/deps/zg/src/HangulData.zig
@@ -0,0 +1,53 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+
+pub const Syllable = enum {
+    none,
+    L,
+    LV,
+    LVT,
+    V,
+    T,
+};
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u3 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("hangul");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+    var self = Self{ .allocator = allocator };
+
+    const stage_1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.s1);
+    for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u3, stage_2_len);
+    errdefer allocator.free(self.s2);
+    for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+}
+
+/// Returns the Hangul syllable type for `cp`.
+pub fn syllable(self: Self, cp: u21) Syllable {
+    return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]);
+}
--- a/deps/zg/src/NormData.zig
+++ b/deps/zg/src/NormData.zig
@@ -0,0 +1,37 @@
+const std = @import("std");
+const mem = std.mem;
+
+const CanonData = @import("CanonData");
+const CccData = @import("CombiningData");
+const CompatData = @import("CompatData");
+const FoldData = @import("FoldData");
+const HangulData = @import("HangulData");
+const NormPropsData = @import("NormPropsData");
+
+canon_data: CanonData = undefined,
+ccc_data: CccData = undefined,
+compat_data: CompatData = undefined,
+hangul_data: HangulData = undefined,
+normp_data: NormPropsData = undefined,
+
+const Self = @This();
+
+pub fn init(self: *Self, allocator: std.mem.Allocator) !void {
+    self.canon_data = try CanonData.init(allocator);
+    errdefer self.canon_data.deinit();
+    self.ccc_data = try CccData.init(allocator);
+    errdefer self.ccc_data.deinit();
+    self.compat_data = try CompatData.init(allocator);
+    errdefer self.compat_data.deinit();
+    self.hangul_data = try HangulData.init(allocator);
+    errdefer self.hangul_data.deinit();
+    self.normp_data = try NormPropsData.init(allocator);
+}
+
+pub fn deinit(self: *Self) void {
+    self.canon_data.deinit();
+    self.ccc_data.deinit();
+    self.compat_data.deinit();
+    self.hangul_data.deinit();
+    self.normp_data.deinit();
+}
--- a/deps/zg/src/NormPropsData.zig
+++ b/deps/zg/src/NormPropsData.zig
@@ -0,0 +1,54 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u4 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("normp");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+    var self = Self{ .allocator = allocator };
+
+    const stage_1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.s1);
+    for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u4, stage_2_len);
+    errdefer allocator.free(self.s2);
+    for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+}
+
+/// Returns true if `cp` is already in NFD form.
+pub fn isNfd(self: Self, cp: u21) bool {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0;
+}
+
+/// Returns true if `cp` is already in NFKD form.
+pub fn isNfkd(self: Self, cp: u21) bool {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0;
+}
+
+/// Returns true if `cp` is not allowed in any normalized form.
+pub fn isFcx(self: Self, cp: u21) bool {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
+}
--- a/deps/zg/src/Normalize.zig
+++ b/deps/zg/src/Normalize.zig
@@ -0,0 +1,622 @@
+//! Normalizer contains functions and methods that implement
+//! Unicode Normalization. You can normalize strings into NFC,
+//! NFKC, NFD, and NFKD normalization forms.
+
+const std = @import("std");
+const debug = std.debug;
+const assert = debug.assert;
+const fmt = std.fmt;
+const heap = std.heap;
+const mem = std.mem;
+const simd = std.simd;
+const testing = std.testing;
+const unicode = std.unicode;
+
+const ascii = @import("ascii");
+const CodePointIterator = @import("code_point").Iterator;
+pub const NormData = @import("NormData");
+
+norm_data: *const NormData,
+
+const Self = @This();
+
+const SBase: u21 = 0xAC00;
+const LBase: u21 = 0x1100;
+const VBase: u21 = 0x1161;
+const TBase: u21 = 0x11A7;
+const LCount: u21 = 19;
+const VCount: u21 = 21;
+const TCount: u21 = 28;
+const NCount: u21 = 588; // VCount * TCount
+const SCount: u21 = 11172; // LCount * NCount
+
+fn decomposeHangul(self: Self, cp: u21, buf: []u21) ?Decomp {
+    const kind = self.norm_data.hangul_data.syllable(cp);
+    if (kind != .LV and kind != .LVT) return null;
+
+    const SIndex: u21 = cp - SBase;
+    const LIndex: u21 = SIndex / NCount;
+    const VIndex: u21 = (SIndex % NCount) / TCount;
+    const TIndex: u21 = SIndex % TCount;
+    const LPart: u21 = LBase + LIndex;
+    const VPart: u21 = VBase + VIndex;
+
+    var dc = Decomp{ .form = .nfd };
+    buf[0] = LPart;
+    buf[1] = VPart;
+
+    if (TIndex == 0) {
+        dc.cps = buf[0..2];
+        return dc;
+    }
+
+    // TPart
+    buf[2] = TBase + TIndex;
+    dc.cps = buf[0..3];
+    return dc;
+}
+
+fn composeHangulCanon(lv: u21, t: u21) u21 {
+    assert(0x11A8 <= t and t <= 0x11C2);
+    return lv + (t - TBase);
+}
+
+fn composeHangulFull(l: u21, v: u21, t: u21) u21 {
+    assert(0x1100 <= l and l <= 0x1112);
+    assert(0x1161 <= v and v <= 0x1175);
+    const LIndex = l - LBase;
+    const VIndex = v - VBase;
+    const LVIndex = LIndex * NCount + VIndex * TCount;
+
+    if (t == 0) return SBase + LVIndex;
+
+    assert(0x11A8 <= t and t <= 0x11C2);
+    const TIndex = t - TBase;
+
+    return SBase + LVIndex + TIndex;
+}
+
+const Form = enum {
+    nfc,
+    nfd,
+    nfkc,
+    nfkd,
+    same,
+};
+
+const Decomp = struct {
+    form: Form = .same,
+    cps: []const u21 = &.{},
+};
+
+// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
+fn mapping(self: Self, cp: u21, form: Form) Decomp {
+    var dc = Decomp{};
+
+    switch (form) {
+        .nfd => {
+            dc.cps = self.norm_data.canon_data.toNfd(cp);
+            if (dc.cps.len != 0) dc.form = .nfd;
+        },
+
+        .nfkd => {
+            dc.cps = self.norm_data.compat_data.toNfkd(cp);
+            if (dc.cps.len != 0) {
+                dc.form = .nfkd;
+            } else {
+                dc.cps = self.norm_data.canon_data.toNfd(cp);
+                if (dc.cps.len != 0) dc.form = .nfkd;
+            }
+        },
+
+        else => @panic("Normalizer.mapping only accepts form .nfd or .nfkd."),
+    }
+
+    return dc;
+}
+
+// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
+fn decompose(
+    self: Self,
+    cp: u21,
+    form: Form,
+    buf: []u21,
+) Decomp {
+    // ASCII
+    if (cp < 128) return .{};
+
+    // NFD / NFKD quick checks.
+    switch (form) {
+        .nfd => if (self.norm_data.normp_data.isNfd(cp)) return .{},
+        .nfkd => if (self.norm_data.normp_data.isNfkd(cp)) return .{},
+        else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."),
+    }
+
+    // Hangul precomposed syllable full decomposition.
+    if (self.decomposeHangul(cp, buf)) |dc| return dc;
+
+    // Full decomposition.
+    var dc = Decomp{ .form = form };
+
+    var result_index: usize = 0;
+    var work_index: usize = 1;
+
+    // Start work with argument code point.
+    var work = [_]u21{cp} ++ [_]u21{0} ** 17;
+
+    while (work_index > 0) {
+        // Look at previous code point in work queue.
+        work_index -= 1;
+        const next = work[work_index];
+        const m = self.mapping(next, form);
+
+        // No more of decompositions for this code point.
+        if (m.form == .same) {
+            buf[result_index] = next;
+            result_index += 1;
+            continue;
+        }
+
+        // Work backwards through decomposition.
+        // `i` starts at 1 because m_last is 1 past the last code point.
+        var i: usize = 1;
+        while (i <= m.cps.len) : ({
+            i += 1;
+            work_index += 1;
+        }) {
+            work[work_index] = m.cps[m.cps.len - i];
+        }
+    }
+
+    dc.cps = buf[0..result_index];
+
+    return dc;
+}
+
+test "decompose" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    var n = Self{ .norm_data = &data };
+
+    var buf: [18]u21 = undefined;
+
+    var dc = n.decompose('é', .nfd, &buf);
+    try testing.expect(dc.form == .nfd);
+    try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]);
+
+    dc = n.decompose('\u{1e0a}', .nfd, &buf);
+    try testing.expect(dc.form == .nfd);
+    try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
+
+    dc = n.decompose('\u{1e0a}', .nfkd, &buf);
+    try testing.expect(dc.form == .nfkd);
+    try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
+
+    dc = n.decompose('\u{3189}', .nfd, &buf);
+    try testing.expect(dc.form == .same);
+    try testing.expect(dc.cps.len == 0);
+
+    dc = n.decompose('\u{3189}', .nfkd, &buf);
+    try testing.expect(dc.form == .nfkd);
+    try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]);
+
+    dc = n.decompose('\u{ace1}', .nfd, &buf);
+    try testing.expect(dc.form == .nfd);
+    try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
+
+    dc = n.decompose('\u{ace1}', .nfkd, &buf);
+    try testing.expect(dc.form == .nfd);
+    try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
+
+    dc = n.decompose('\u{3d3}', .nfd, &buf);
+    try testing.expect(dc.form == .nfd);
+    try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]);
+
+    dc = n.decompose('\u{3d3}', .nfkd, &buf);
+    try testing.expect(dc.form == .nfkd);
+    try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]);
+}
+
+/// Returned from various functions in this namespace. Remember to call `deinit` to free any allocated memory.
+pub const Result = struct {
+    allocator: ?mem.Allocator = null,
+    slice: []const u8,
+
+    pub fn deinit(self: *const Result) void {
+        if (self.allocator) |allocator| allocator.free(self.slice);
+    }
+};
+
+// Compares code points by Canonical Combining Class order.
+fn cccLess(self: Self, lhs: u21, rhs: u21) bool {
+    return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs);
+}
+
+// Applies the Canonical Sorting Algorithm.
+fn canonicalSort(self: Self, cps: []u21) void {
+    var i: usize = 0;
+    while (i < cps.len) : (i += 1) {
+        const start: usize = i;
+        while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
+        mem.sort(u21, cps[start..i], self, cccLess);
+    }
+}
+
+/// Normalize `str` to NFD.
+pub fn nfd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
+    return self.nfxd(allocator, str, .nfd);
+}
+
+/// Normalize `str` to NFKD.
+pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
+    return self.nfxd(allocator, str, .nfkd);
+}
+
+pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error![]u21 {
+    var dcp_list = std.ArrayList(u21).init(allocator);
+    defer dcp_list.deinit();
+
+    var cp_iter = CodePointIterator{ .bytes = str };
+    var dc_buf: [18]u21 = undefined;
+
+    while (cp_iter.next()) |cp| {
+        const dc = self.decompose(cp.code, form, &dc_buf);
+        if (dc.form == .same) {
+            try dcp_list.append(cp.code);
+        } else {
+            try dcp_list.appendSlice(dc.cps);
+        }
+    }
+
+    self.canonicalSort(dcp_list.items);
+
+    return try dcp_list.toOwnedSlice();
+}
+
+fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
+    // Quick checks.
+    if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
+
+    const dcps = try self.nfxdCodePoints(allocator, str, form);
+    defer allocator.free(dcps);
+
+    var dstr_list = std.ArrayList(u8).init(allocator);
+    defer dstr_list.deinit();
+    var buf: [4]u8 = undefined;
+
+    for (dcps) |dcp| {
+        const len = unicode.utf8Encode(dcp, &buf) catch unreachable;
+        try dstr_list.appendSlice(buf[0..len]);
+    }
+
+    return Result{ .allocator = allocator, .slice = try dstr_list.toOwnedSlice() };
+}
+
+test "nfd ASCII / no-alloc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfd(allocator, "Hello World!");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("Hello World!", result.slice);
+}
+
+test "nfd !ASCII / alloc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
+}
+
+test "nfkd ASCII / no-alloc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfkd(allocator, "Hello World!");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("Hello World!", result.slice);
+}
+
+test "nfkd !ASCII / alloc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
+}
+
+pub fn nfdCodePoints(
+    self: Self,
+    allocator: mem.Allocator,
+    cps: []const u21,
+) mem.Allocator.Error![]u21 {
+    var dcp_list = std.ArrayList(u21).init(allocator);
+    defer dcp_list.deinit();
+
+    var dc_buf: [18]u21 = undefined;
+
+    for (cps) |cp| {
+        const dc = self.decompose(cp, .nfd, &dc_buf);
+
+        if (dc.form == .same) {
+            try dcp_list.append(cp);
+        } else {
+            try dcp_list.appendSlice(dc.cps);
+        }
+    }
+
+    self.canonicalSort(dcp_list.items);
+
+    return try dcp_list.toOwnedSlice();
+}
+
+pub fn nfkdCodePoints(
+    self: Self,
+    allocator: mem.Allocator,
+    cps: []const u21,
+) mem.Allocator.Error![]u21 {
+    var dcp_list = std.ArrayList(u21).init(allocator);
+    defer dcp_list.deinit();
+
+    var dc_buf: [18]u21 = undefined;
+
+    for (cps) |cp| {
+        const dc = self.decompose(cp, .nfkd, &dc_buf);
+
+        if (dc.form == .same) {
+            try dcp_list.append(cp);
+        } else {
+            try dcp_list.appendSlice(dc.cps);
+        }
+    }
+
+    self.canonicalSort(dcp_list.items);
+
+    return try dcp_list.toOwnedSlice();
+}
+
+// Composition (NFC, NFKC)
+
+fn isHangul(self: Self, cp: u21) bool {
+    return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none;
+}
+
+/// Normalizes `str` to NFC.
+pub fn nfc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
+    return self.nfxc(allocator, str, .nfc);
+}
+
+/// Normalizes `str` to NFKC.
+pub fn nfkc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
+    return self.nfxc(allocator, str, .nfkc);
+}
+
+fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
+    // Quick checks.
+    if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
+    if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
+
+    // Decompose first.
+    var dcps = if (form == .nfc)
+        try self.nfxdCodePoints(allocator, str, .nfd)
+    else
+        try self.nfxdCodePoints(allocator, str, .nfkd);
+    defer allocator.free(dcps);
+
+    // Compose
+    const tombstone = 0xe000; // Start of BMP Private Use Area
+
+    // Loop over all decomposed code points.
+    while (true) {
+        var i: usize = 1; // start at second code point.
+        var deleted: usize = 0;
+
+        // For each code point, C, find the preceding
+        // starter code point L, if any.
+        block_check: while (i < dcps.len) : (i += 1) {
+            const C = dcps[i];
+            if (C == tombstone) continue :block_check;
+            const cc_C = self.norm_data.ccc_data.ccc(C);
+            var starter_index: ?usize = null;
+            var j: usize = i;
+
+            // Seek back to find starter L, if any.
+            while (true) {
+                j -= 1;
+                if (dcps[j] == tombstone) continue;
+
+                // Check for starter.
+                if (self.norm_data.ccc_data.isStarter(dcps[j])) {
+                    // Check for blocking conditions.
+                    for (dcps[(j + 1)..i]) |B| {
+                        if (B == tombstone) continue;
+                        const cc_B = self.norm_data.ccc_data.ccc(B);
+                        if (cc_B != 0 and self.isHangul(C)) continue :block_check;
+                        if (cc_B >= cc_C) continue :block_check;
+                    }
+
+                    // Found starter at j.
+                    starter_index = j;
+                    break;
+                }
+
+                if (j == 0) break;
+            }
+
+            // If we have a starter L, see if there's a primary
+            // composite, P, for the sequence L, C. If so, we must
+            // repace L with P and delete C.
+            if (starter_index) |sidx| {
+                const L = dcps[sidx];
+                var processed_hangul = false;
+
+                // If L and C are Hangul syllables, we can compose
+                // them algorithmically if possible.
+                if (self.isHangul(L) and self.isHangul(C)) {
+                    // Get Hangul syllable types.
+                    const l_stype = self.norm_data.hangul_data.syllable(L);
+                    const c_stype = self.norm_data.hangul_data.syllable(C);
+
+                    if (l_stype == .LV and c_stype == .T) {
+                        // LV, T canonical composition.
+                        dcps[sidx] = composeHangulCanon(L, C);
+                        dcps[i] = tombstone; // Mark for deletion.
+                        processed_hangul = true;
+                    }
+
+                    if (l_stype == .L and c_stype == .V) {
+                        // L, V full composition. L, V, T is handled via main loop.
+                        dcps[sidx] = composeHangulFull(L, C, 0);
+                        dcps[i] = tombstone; // Mark for deletion.
+                        processed_hangul = true;
+                    }
+
+                    if (processed_hangul) deleted += 1;
+                }
+
+                // If no composition has occurred yet.
+                if (!processed_hangul) {
+                    // L, C are not Hangul, so check for primary composite
+                    // in the Unicode Character Database.
+                    if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| {
+                        // We have a primary composite P for L, C.
+                        // We must check if P is not in the Full
+                        // Composition Exclusions  (FCX) list,
+                        // preventing it from appearing in any
+                        // composed form (NFC, NFKC).
+                        if (!self.norm_data.normp_data.isFcx(P)) {
+                            dcps[sidx] = P;
+                            dcps[i] = tombstone; // Mark for deletion.
+                            deleted += 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        // If we have no deletions. the code point sequence
+        // has been fully composed.
+        if (deleted == 0) {
+            var cstr_list = std.ArrayList(u8).init(allocator);
+            defer cstr_list.deinit();
+            var buf: [4]u8 = undefined;
+
+            for (dcps) |cp| {
+                if (cp == tombstone) continue; // "Delete"
+                const len = unicode.utf8Encode(cp, &buf) catch unreachable;
+                try cstr_list.appendSlice(buf[0..len]);
+            }
+
+            return Result{ .allocator = allocator, .slice = try cstr_list.toOwnedSlice() };
+        }
+    }
+}
+
+test "nfc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
+}
+
+test "nfkc" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
+    defer result.deinit();
+
+    try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
+}
+
+/// Tests for equality of `a` and `b` after normalizing to NFC.
+pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool {
+    const norm_result_a = try self.nfc(allocator, a);
+    defer norm_result_a.deinit();
+    const norm_result_b = try self.nfc(allocator, b);
+    defer norm_result_b.deinit();
+
+    return mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
+}
+
+test "eql" {
+    const allocator = testing.allocator;
+    var data: NormData = undefined;
+    try NormData.init(&data, allocator);
+    defer data.deinit();
+    const n = Self{ .norm_data = &data };
+
+    try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
+    try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
+}
+
+/// Returns true if `str` only contains Latin-1 Supplement
+/// code points. Uses SIMD if possible.
+pub fn isLatin1Only(str: []const u8) bool {
+    var cp_iter = CodePointIterator{ .bytes = str };
+
+    const vec_len = simd.suggestVectorLength(u21) orelse return blk: {
+        break :blk while (cp_iter.next()) |cp| {
+            if (cp.code > 256) break false;
+        } else true;
+    };
+
+    const Vec = @Vector(vec_len, u21);
+
+    outer: while (true) {
+        var v1: Vec = undefined;
+        const saved_cp_i = cp_iter.i;
+
+        for (0..vec_len) |i| {
+            if (cp_iter.next()) |cp| {
+                v1[i] = cp.code;
+            } else {
+                cp_iter.i = saved_cp_i;
+                break :outer;
+            }
+        }
+        const v2: Vec = @splat(256);
+        if (@reduce(.Or, v1 > v2)) return false;
+    }
+
+    return while (cp_iter.next()) |cp| {
+        if (cp.code > 256) break false;
+    } else true;
+}
+
+test "isLatin1Only" {
+    const latin1_only = "Hello, World! \u{fe} \u{ff}";
+    try testing.expect(isLatin1Only(latin1_only));
+    const not_latin1_only = "Héllo, World! \u{3d3}";
+    try testing.expect(!isLatin1Only(not_latin1_only));
+}
--- a/deps/zg/src/PropsData.zig
+++ b/deps/zg/src/PropsData.zig
@@ -0,0 +1,164 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+
+allocator: mem.Allocator,
+core_s1: []u16 = undefined,
+core_s2: []u8 = undefined,
+props_s1: []u16 = undefined,
+props_s2: []u8 = undefined,
+num_s1: []u16 = undefined,
+num_s2: []u8 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const endian = builtin.cpu.arch.endian();
+
+    // Process DerivedCoreProperties.txt
+    const core_bytes = @embedFile("core_props");
+    var core_fbs = std.io.fixedBufferStream(core_bytes);
+    var core_decomp = decompressor(.raw, core_fbs.reader());
+    var core_reader = core_decomp.reader();
+
+    var self = Self{ .allocator = allocator };
+
+    const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
+    self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
+    errdefer allocator.free(self.core_s1);
+    for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);
+
+    const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
+    self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
+    errdefer allocator.free(self.core_s2);
+    _ = try core_reader.readAll(self.core_s2);
+
+    // Process PropList.txt
+    const props_bytes = @embedFile("props");
+    var props_fbs = std.io.fixedBufferStream(props_bytes);
+    var props_decomp = decompressor(.raw, props_fbs.reader());
+    var props_reader = props_decomp.reader();
+
+    const stage_1_len: u16 = try props_reader.readInt(u16, endian);
+    self.props_s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.props_s1);
+    for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try props_reader.readInt(u16, endian);
+    self.props_s2 = try allocator.alloc(u8, stage_2_len);
+    errdefer allocator.free(self.props_s2);
+    _ = try props_reader.readAll(self.props_s2);
+
+    // Process DerivedNumericType.txt
+    const num_bytes = @embedFile("numeric");
+    var num_fbs = std.io.fixedBufferStream(num_bytes);
+    var num_decomp = decompressor(.raw, num_fbs.reader());
+    var num_reader = num_decomp.reader();
+
+    const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
+    self.num_s1 = try allocator.alloc(u16, num_stage_1_len);
+    errdefer allocator.free(self.num_s1);
+    for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian);
+
+    const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
+    self.num_s2 = try allocator.alloc(u8, num_stage_2_len);
+    errdefer allocator.free(self.num_s2);
+    _ = try num_reader.readAll(self.num_s2);
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.core_s1);
+    self.allocator.free(self.core_s2);
+    self.allocator.free(self.props_s1);
+    self.allocator.free(self.props_s2);
+    self.allocator.free(self.num_s1);
+    self.allocator.free(self.num_s2);
+}
+
+/// True if `cp` is a mathematical symbol.
+pub fn isMath(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
+}
+
+/// True if `cp` is an alphabetic character.
+pub fn isAlphabetic(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
+}
+
+/// True if `cp` is a valid identifier start character.
+pub fn isIdStart(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
+}
+
+/// True if `cp` is a valid identifier continuation character.
+pub fn isIdContinue(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
+}
+
+/// True if `cp` is a valid extended identifier start character.
+pub fn isXidStart(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
+}
+
+/// True if `cp` is a valid extended identifier continuation character.
+pub fn isXidContinue(self: Self, cp: u21) bool {
+    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
+}
+
+/// True if `cp` is a whitespace character.
+pub fn isWhitespace(self: Self, cp: u21) bool {
+    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
+}
+
+/// True if `cp` is a hexadecimal digit.
+pub fn isHexDigit(self: Self, cp: u21) bool {
+    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
+}
+
+/// True if `cp` is a diacritic mark.
+pub fn isDiacritic(self: Self, cp: u21) bool {
+    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
+}
+
+/// True if `cp` is numeric.
+pub fn isNumeric(self: Self, cp: u21) bool {
+    return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
+}
+
+/// True if `cp` is a digit.
+pub fn isDigit(self: Self, cp: u21) bool {
+    return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
+}
+
+/// True if `cp` is decimal.
+pub fn isDecimal(self: Self, cp: u21) bool {
+    return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
+}
+
+test "Props" {
+    const self = try init(testing.allocator);
+    defer self.deinit();
+
+    try testing.expect(self.isHexDigit('F'));
+    try testing.expect(self.isHexDigit('a'));
+    try testing.expect(self.isHexDigit('8'));
+    try testing.expect(!self.isHexDigit('z'));
+
+    try testing.expect(self.isDiacritic('\u{301}'));
+    try testing.expect(self.isAlphabetic('A'));
+    try testing.expect(!self.isAlphabetic('3'));
+    try testing.expect(self.isMath('+'));
+
+    try testing.expect(self.isNumeric('\u{277f}'));
+    try testing.expect(self.isDigit('\u{2070}'));
+    try testing.expect(self.isDecimal('3'));
+
+    try testing.expect(!self.isNumeric('1'));
+    try testing.expect(!self.isDigit('2'));
+    try testing.expect(!self.isDecimal('g'));
+}
--- a/deps/zg/src/ScriptsData.zig
+++ b/deps/zg/src/ScriptsData.zig
@@ -0,0 +1,228 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+
+/// Scripts
+pub const Script = enum {
+    none,
+    Adlam,
+    Ahom,
+    Anatolian_Hieroglyphs,
+    Arabic,
+    Armenian,
+    Avestan,
+    Balinese,
+    Bamum,
+    Bassa_Vah,
+    Batak,
+    Bengali,
+    Bhaiksuki,
+    Bopomofo,
+    Brahmi,
+    Braille,
+    Buginese,
+    Buhid,
+    Canadian_Aboriginal,
+    Carian,
+    Caucasian_Albanian,
+    Chakma,
+    Cham,
+    Cherokee,
+    Chorasmian,
+    Common,
+    Coptic,
+    Cuneiform,
+    Cypriot,
+    Cypro_Minoan,
+    Cyrillic,
+    Deseret,
+    Devanagari,
+    Dives_Akuru,
+    Dogra,
+    Duployan,
+    Egyptian_Hieroglyphs,
+    Elbasan,
+    Elymaic,
+    Ethiopic,
+    Georgian,
+    Glagolitic,
+    Gothic,
+    Grantha,
+    Greek,
+    Gujarati,
+    Gunjala_Gondi,
+    Gurmukhi,
+    Han,
+    Hangul,
+    Hanifi_Rohingya,
+    Hanunoo,
+    Hatran,
+    Hebrew,
+    Hiragana,
+    Imperial_Aramaic,
+    Inherited,
+    Inscriptional_Pahlavi,
+    Inscriptional_Parthian,
+    Javanese,
+    Kaithi,
+    Kannada,
+    Katakana,
+    Kawi,
+    Kayah_Li,
+    Kharoshthi,
+    Khitan_Small_Script,
+    Khmer,
+    Khojki,
+    Khudawadi,
+    Lao,
+    Latin,
+    Lepcha,
+    Limbu,
+    Linear_A,
+    Linear_B,
+    Lisu,
+    Lycian,
+    Lydian,
+    Mahajani,
+    Makasar,
+    Malayalam,
+    Mandaic,
+    Manichaean,
+    Marchen,
+    Masaram_Gondi,
+    Medefaidrin,
+    Meetei_Mayek,
+    Mende_Kikakui,
+    Meroitic_Cursive,
+    Meroitic_Hieroglyphs,
+    Miao,
+    Modi,
+    Mongolian,
+    Mro,
+    Multani,
+    Myanmar,
+    Nabataean,
+    Nag_Mundari,
+    Nandinagari,
+    New_Tai_Lue,
+    Newa,
+    Nko,
+    Nushu,
+    Nyiakeng_Puachue_Hmong,
+    Ogham,
+    Ol_Chiki,
+    Old_Hungarian,
+    Old_Italic,
+    Old_North_Arabian,
+    Old_Permic,
+    Old_Persian,
+    Old_Sogdian,
+    Old_South_Arabian,
+    Old_Turkic,
+    Old_Uyghur,
+    Oriya,
+    Osage,
+    Osmanya,
+    Pahawh_Hmong,
+    Palmyrene,
+    Pau_Cin_Hau,
+    Phags_Pa,
+    Phoenician,
+    Psalter_Pahlavi,
+    Rejang,
+    Runic,
+    Samaritan,
+    Saurashtra,
+    Sharada,
+    Shavian,
+    Siddham,
+    SignWriting,
+    Sinhala,
+    Sogdian,
+    Sora_Sompeng,
+    Soyombo,
+    Sundanese,
+    Syloti_Nagri,
+    Syriac,
+    Tagalog,
+    Tagbanwa,
+    Tai_Le,
+    Tai_Tham,
+    Tai_Viet,
+    Takri,
+    Tamil,
+    Tangsa,
+    Tangut,
+    Telugu,
+    Thaana,
+    Thai,
+    Tibetan,
+    Tifinagh,
+    Tirhuta,
+    Toto,
+    Ugaritic,
+    Vai,
+    Vithkuqi,
+    Wancho,
+    Warang_Citi,
+    Yezidi,
+    Yi,
+    Zanabazar_Square,
+};
+
+allocator: mem.Allocator,
+s1: []u16 = undefined,
+s2: []u8 = undefined,
+s3: []u8 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("scripts");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{ .allocator = allocator };
+
+    const s1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, s1_len);
+    errdefer allocator.free(self.s1);
+    for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const s2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(u8, s2_len);
+    errdefer allocator.free(self.s2);
+    _ = try reader.readAll(self.s2);
+
+    const s3_len: u16 = try reader.readInt(u8, endian);
+    self.s3 = try allocator.alloc(u8, s3_len);
+    errdefer allocator.free(self.s3);
+    _ = try reader.readAll(self.s3);
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+    self.allocator.free(self.s3);
+}
+
+/// Lookup the Script type for `cp`.
+pub fn script(self: Self, cp: u21) ?Script {
+    const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
+    if (byte == 0) return null;
+    return @enumFromInt(byte);
+}
+
+test "script" {
+    const self = try init(std.testing.allocator);
+    defer self.deinit();
+    try testing.expectEqual(Script.Latin, self.script('A').?);
+}
--- a/deps/zg/src/WidthData.zig
+++ b/deps/zg/src/WidthData.zig
@@ -0,0 +1,84 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const compress = std.compress;
+const mem = std.mem;
+const testing = std.testing;
+
+const GraphemeData = @import("GraphemeData");
+
+allocator: mem.Allocator,
+g_data: GraphemeData,
+s1: []u16 = undefined,
+s2: []i3 = undefined,
+
+const Self = @This();
+
+pub fn init(allocator: mem.Allocator) !Self {
+    const decompressor = compress.flate.inflate.decompressor;
+    const in_bytes = @embedFile("dwp");
+    var in_fbs = std.io.fixedBufferStream(in_bytes);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
+    var reader = in_decomp.reader();
+
+    const endian = builtin.cpu.arch.endian();
+
+    var self = Self{
+        .allocator = allocator,
+        .g_data = try GraphemeData.init(allocator),
+    };
+    errdefer self.g_data.deinit();
+
+    const stage_1_len: u16 = try reader.readInt(u16, endian);
+    self.s1 = try allocator.alloc(u16, stage_1_len);
+    errdefer allocator.free(self.s1);
+    for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
+
+    const stage_2_len: u16 = try reader.readInt(u16, endian);
+    self.s2 = try allocator.alloc(i3, stage_2_len);
+    errdefer allocator.free(self.s2);
+    for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian));
+
+    return self;
+}
+
+pub fn deinit(self: *const Self) void {
+    self.allocator.free(self.s1);
+    self.allocator.free(self.s2);
+    self.g_data.deinit();
+}
+
+/// codePointWidth returns the number of cells `cp` requires when rendered
+/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
+/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
+/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
+/// otherwise they return 1.
+pub fn codePointWidth(self: Self, cp: u21) i3 {
+    return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
+}
+
+test "codePointWidth" {
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
+    try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
+    try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
+
+    try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
+    try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
+
+    try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
+    try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
+    try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
+
+    try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
+
+    try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
+    try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
+    try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
+}
--- a/deps/zg/src/ascii.zig
+++ b/deps/zg/src/ascii.zig
@@ -0,0 +1,33 @@
+const std = @import("std");
+const simd = std.simd;
+const testing = std.testing;
+
+/// Returns true if `str` only contains ASCII bytes. Uses SIMD if possible.
+pub fn isAsciiOnly(str: []const u8) bool {
+    const vec_len = simd.suggestVectorLength(u8) orelse return for (str) |b| {
+        if (b > 127) break false;
+    } else true;
+
+    const Vec = @Vector(vec_len, u8);
+    var remaining = str;
+
+    while (true) {
+        if (remaining.len < vec_len) return for (remaining) |b| {
+            if (b > 127) break false;
+        } else true;
+
+        const v1 = remaining[0..vec_len].*;
+        const v2: Vec = @splat(127);
+        if (@reduce(.Or, v1 > v2)) return false;
+        remaining = remaining[vec_len..];
+    }
+
+    return true;
+}
+
+test "isAsciiOnly" {
+    const ascii_only = "Hello, World! 0123456789 !@#$%^&*()_-=+";
+    try testing.expect(isAsciiOnly(ascii_only));
+    const not_ascii_only = "Héllo, World! 0123456789 !@#$%^&*()_-=+";
+    try testing.expect(!isAsciiOnly(not_ascii_only));
+}
--- a/deps/zg/src/code_point.zig
+++ b/deps/zg/src/code_point.zig
@@ -0,0 +1,118 @@
+const std = @import("std");
+
+/// `CodePoint` represents a Unicode code point by its code,
+/// length, and offset in the source bytes.
+pub const CodePoint = struct {
+    code: u21,
+    len: u3,
+    offset: u32,
+};
+
+/// given a small slice of a string, decode the corresponding codepoint
+pub fn decode(bytes: []const u8, offset: u32) ?CodePoint {
+    // EOS fast path
+    if (bytes.len == 0) {
+        return null;
+    }
+
+    // ASCII fast path
+    if (bytes[0] < 128) {
+        return .{
+            .code = bytes[0],
+            .len = 1,
+            .offset = offset,
+        };
+    }
+
+    var cp = CodePoint{
+        .code = undefined,
+        .len = switch (bytes[0]) {
+            0b1100_0000...0b1101_1111 => 2,
+            0b1110_0000...0b1110_1111 => 3,
+            0b1111_0000...0b1111_0111 => 4,
+            else => {
+                // unicode replacement code point.
+                return .{
+                    .code = 0xfffd,
+                    .len = 1,
+                    .offset = offset,
+                };
+            },
+        },
+        .offset = offset,
+    };
+
+    // Return replacement if we don' have a complete codepoint remaining. Consumes only one byte
+    if (cp.len > bytes.len) {
+        // Unicode replacement code point.
+        return .{
+            .code = 0xfffd,
+            .len = 1,
+            .offset = offset,
+        };
+    }
+
+    const cp_bytes = bytes[0..cp.len];
+    cp.code = switch (cp.len) {
+        2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
+
+        3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
+            (cp_bytes[1] & 0b00111111)) << 6) |
+            (cp_bytes[2] & 0b00111111),
+
+        4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
+            (cp_bytes[1] & 0b00111111)) << 6) |
+            (cp_bytes[2] & 0b00111111)) << 6) |
+            (cp_bytes[3] & 0b00111111),
+
+        else => @panic("CodePointIterator.next invalid code point length."),
+    };
+
+    return cp;
+}
+
+/// `Iterator` iterates a string one `CodePoint` at-a-time.
+pub const Iterator = struct {
+    bytes: []const u8,
+    i: u32 = 0,
+
+    pub fn next(self: *Iterator) ?CodePoint {
+        if (self.i >= self.bytes.len) return null;
+
+        const res = decode(self.bytes[self.i..], self.i);
+        if (res) |cp| {
+            self.i += cp.len;
+        }
+
+        return res;
+    }
+
+    pub fn peek(self: *Iterator) ?CodePoint {
+        const saved_i = self.i;
+        defer self.i = saved_i;
+        return self.next();
+    }
+};
+
+test "decode" {
+    const bytes = "🌩️";
+    const res = decode(bytes, 0);
+
+    if (res) |cp| {
+        try std.testing.expectEqual(@as(u21, 0x1F329), cp.code);
+        try std.testing.expectEqual(4, cp.len);
+    } else {
+        // shouldn't have failed to return
+        try std.testing.expect(false);
+    }
+}
+
+test "peek" {
+    var iter = Iterator{ .bytes = "Hi" };
+
+    try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
+    try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
+    try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
+    try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
+    try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
+}
--- a/deps/zg/src/grapheme.zig
+++ b/deps/zg/src/grapheme.zig
@@ -0,0 +1,258 @@
+const std = @import("std");
+const mem = std.mem;
+const unicode = std.unicode;
+
+const CodePoint = @import("code_point").CodePoint;
+const CodePointIterator = @import("code_point").Iterator;
+pub const GraphemeData = @import("GraphemeData");
+
+/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
+pub const Grapheme = struct {
+    len: u8,
+    offset: u32,
+
+    /// `bytes` returns the slice of bytes that correspond to
+    /// this grapheme cluster in `src`.
+    pub fn bytes(self: Grapheme, src: []const u8) []const u8 {
+        return src[self.offset..][0..self.len];
+    }
+};
+
+/// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.
+pub const Iterator = struct {
+    buf: [2]?CodePoint = .{ null, null },
+    cp_iter: CodePointIterator,
+    data: *const GraphemeData,
+
+    const Self = @This();
+
+    /// Assumes `src` is valid UTF-8.
+    pub fn init(str: []const u8, data: *const GraphemeData) Self {
+        var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
+        self.advance();
+        return self;
+    }
+
+    fn advance(self: *Self) void {
+        self.buf[0] = self.buf[1];
+        self.buf[1] = self.cp_iter.next();
+    }
+
+    pub fn next(self: *Self) ?Grapheme {
+        self.advance();
+
+        // If no more
+        if (self.buf[0] == null) return null;
+        // If last one
+        if (self.buf[1] == null) return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
+        // If ASCII
+        if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
+            return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
+        }
+
+        const gc_start = self.buf[0].?.offset;
+        var gc_len: u8 = self.buf[0].?.len;
+        var state = State{};
+
+        if (graphemeBreak(
+            self.buf[0].?.code,
+            self.buf[1].?.code,
+            self.data,
+            &state,
+        )) return Grapheme{ .len = gc_len, .offset = gc_start };
+
+        while (true) {
+            self.advance();
+            if (self.buf[0] == null) break;
+
+            gc_len += self.buf[0].?.len;
+
+            if (graphemeBreak(
+                self.buf[0].?.code,
+                if (self.buf[1]) |ncp| ncp.code else 0,
+                self.data,
+                &state,
+            )) break;
+        }
+
+        return Grapheme{ .len = gc_len, .offset = gc_start };
+    }
+};
+
+// Predicates
+fn isBreaker(cp: u21, data: *const GraphemeData) bool {
+    // Extract relevant properties.
+    const cp_gbp_prop = data.gbp(cp);
+    return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
+}
+
+// Grapheme break state.
+pub const State = struct {
+    bits: u3 = 0,
+
+    // Extended Pictographic (emoji)
+    fn hasXpic(self: State) bool {
+        return self.bits & 1 == 1;
+    }
+    fn setXpic(self: *State) void {
+        self.bits |= 1;
+    }
+    fn unsetXpic(self: *State) void {
+        self.bits ^= 1;
+    }
+
+    // Regional Indicatior (flags)
+    fn hasRegional(self: State) bool {
+        return self.bits & 2 == 2;
+    }
+    fn setRegional(self: *State) void {
+        self.bits |= 2;
+    }
+    fn unsetRegional(self: *State) void {
+        self.bits ^= 2;
+    }
+
+    // Indic Conjunct
+    fn hasIndic(self: State) bool {
+        return self.bits & 4 == 4;
+    }
+    fn setIndic(self: *State) void {
+        self.bits |= 4;
+    }
+    fn unsetIndic(self: *State) void {
+        self.bits ^= 4;
+    }
+};
+
+/// `graphemeBreak` returns true only if a grapheme break point is required
+/// between `cp1` and `cp2`. `state` should start out as 0. If calling
+/// iteratively over a sequence of code points, this function must be called
+/// IN ORDER on ALL potential breaks in a string.
+/// Modeled after the API of utf8proc's `utf8proc_grapheme_break_stateful`.
+/// https://github.com/JuliaStrings/utf8proc/blob/2bbb1ba932f727aad1fab14fafdbc89ff9dc4604/utf8proc.h#L599-L617
+pub fn graphemeBreak(
+    cp1: u21,
+    cp2: u21,
+    data: *const GraphemeData,
+    state: *State,
+) bool {
+    // Extract relevant properties.
+    const cp1_gbp_prop = data.gbp(cp1);
+    const cp1_indic_prop = data.indic(cp1);
+    const cp1_is_emoji = data.isEmoji(cp1);
+
+    const cp2_gbp_prop = data.gbp(cp2);
+    const cp2_indic_prop = data.indic(cp2);
+    const cp2_is_emoji = data.isEmoji(cp2);
+
+    // GB11: Emoji Extend* ZWJ x Emoji
+    if (!state.hasXpic() and cp1_is_emoji) state.setXpic();
+    // GB9c: Indic Conjunct Break
+    if (!state.hasIndic() and cp1_indic_prop == .Consonant) state.setIndic();
+
+    // GB3: CR x LF
+    if (cp1 == '\r' and cp2 == '\n') return false;
+
+    // GB4: Control
+    if (isBreaker(cp1, data)) return true;
+
+    // GB11: Emoji Extend* ZWJ x Emoji
+    if (state.hasXpic() and
+        cp1_gbp_prop == .ZWJ and
+        cp2_is_emoji)
+    {
+        state.unsetXpic();
+        return false;
+    }
+
+    // GB9b: x (Extend | ZWJ)
+    if (cp2_gbp_prop == .Extend or cp2_gbp_prop == .ZWJ) return false;
+
+    // GB9a: x Spacing
+    if (cp2_gbp_prop == .SpacingMark) return false;
+
+    // GB9b: Prepend x
+    if (cp1_gbp_prop == .Prepend and !isBreaker(cp2, data)) return false;
+
+    // GB12, GB13: RI x RI
+    if (cp1_gbp_prop == .Regional_Indicator and cp2_gbp_prop == .Regional_Indicator) {
+        if (state.hasRegional()) {
+            state.unsetRegional();
+            return true;
+        } else {
+            state.setRegional();
+            return false;
+        }
+    }
+
+    // GB6: Hangul L x (L|V|LV|VT)
+    if (cp1_gbp_prop == .L) {
+        if (cp2_gbp_prop == .L or
+            cp2_gbp_prop == .V or
+            cp2_gbp_prop == .LV or
+            cp2_gbp_prop == .LVT) return false;
+    }
+
+    // GB7: Hangul (LV | V) x (V | T)
+    if (cp1_gbp_prop == .LV or cp1_gbp_prop == .V) {
+        if (cp2_gbp_prop == .V or
+            cp2_gbp_prop == .T) return false;
+    }
+
+    // GB8: Hangul (LVT | T) x T
+    if (cp1_gbp_prop == .LVT or cp1_gbp_prop == .T) {
+        if (cp2_gbp_prop == .T) return false;
+    }
+
+    // GB9c: Indic Conjunct Break
+    if (state.hasIndic() and
+        cp1_indic_prop == .Consonant and
+        (cp2_indic_prop == .Extend or cp2_indic_prop == .Linker))
+    {
+        return false;
+    }
+
+    if (state.hasIndic() and
+        cp1_indic_prop == .Extend and
+        cp2_indic_prop == .Linker)
+    {
+        return false;
+    }
+
+    if (state.hasIndic() and
+        (cp1_indic_prop == .Linker or cp1_gbp_prop == .ZWJ) and
+        cp2_indic_prop == .Consonant)
+    {
+        state.unsetIndic();
+        return false;
+    }
+
+    return true;
+}
+
+test "Segmentation ZWJ and ZWSP emoji sequences" {
+    const seq_1 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
+    const seq_2 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
+    const with_zwj = seq_1 ++ "\u{200D}" ++ seq_2;
+    const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
+    const no_joiner = seq_1 ++ seq_2;
+
+    const data = try GraphemeData.init(std.testing.allocator);
+    defer data.deinit();
+
+    var iter = Iterator.init(with_zwj, &data);
+
+    var i: usize = 0;
+    while (iter.next()) |_| : (i += 1) {}
+    try std.testing.expectEqual(@as(usize, 1), i);
+
+    iter = Iterator.init(with_zwsp, &data);
+    i = 0;
+    while (iter.next()) |_| : (i += 1) {}
+    try std.testing.expectEqual(@as(usize, 3), i);
+
+    iter = Iterator.init(no_joiner, &data);
+    i = 0;
+    while (iter.next()) |_| : (i += 1) {}
+    try std.testing.expectEqual(@as(usize, 2), i);
+}
--- a/deps/zg/src/unicode_tests.zig
+++ b/deps/zg/src/unicode_tests.zig
@@ -0,0 +1,195 @@
+const std = @import("std");
+const fmt = std.fmt;
+const fs = std.fs;
+const io = std.io;
+const heap = std.heap;
+const mem = std.mem;
+const testing = std.testing;
+const unicode = std.unicode;
+
+const Grapheme = @import("grapheme").Grapheme;
+const GraphemeData = @import("grapheme").GraphemeData;
+const GraphemeIterator = @import("grapheme").Iterator;
+const Normalize = @import("Normalize");
+
+test "Unicode normalization tests" {
+    var arena = heap.ArenaAllocator.init(testing.allocator);
+    defer arena.deinit();
+    var allocator = arena.allocator();
+
+    var norm_data: Normalize.NormData = undefined;
+    try Normalize.NormData.init(&norm_data, allocator);
+    const n = Normalize{ .norm_data = &norm_data };
+
+    var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
+    defer file.close();
+    var buf_reader = io.bufferedReader(file.reader());
+    const input_stream = buf_reader.reader();
+
+    var line_no: usize = 0;
+    var buf: [4096]u8 = undefined;
+    var cp_buf: [4]u8 = undefined;
+
+    while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
+        line_no += 1;
+        // Skip comments or empty lines.
+        if (line.len == 0 or line[0] == '#' or line[0] == '@') continue;
+        // Iterate over fields.
+        var fields = mem.split(u8, line, ";");
+        var field_index: usize = 0;
+        var input: []u8 = undefined;
+        defer allocator.free(input);
+
+        while (fields.next()) |field| : (field_index += 1) {
+            if (field_index == 0) {
+                var i_buf = std.ArrayList(u8).init(allocator);
+                defer i_buf.deinit();
+
+                var i_fields = mem.split(u8, field, " ");
+                while (i_fields.next()) |s| {
+                    const icp = try fmt.parseInt(u21, s, 16);
+                    const len = try unicode.utf8Encode(icp, &cp_buf);
+                    try i_buf.appendSlice(cp_buf[0..len]);
+                }
+
+                input = try i_buf.toOwnedSlice();
+            } else if (field_index == 1) {
+                //debug.print("\n*** {s} ***\n", .{line});
+                // NFC, time to test.
+                var w_buf = std.ArrayList(u8).init(allocator);
+                defer w_buf.deinit();
+
+                var w_fields = mem.split(u8, field, " ");
+                while (w_fields.next()) |s| {
+                    const wcp = try fmt.parseInt(u21, s, 16);
+                    const len = try unicode.utf8Encode(wcp, &cp_buf);
+                    try w_buf.appendSlice(cp_buf[0..len]);
+                }
+
+                const want = w_buf.items;
+                var got = try n.nfc(allocator, input);
+                defer got.deinit();
+
+                try testing.expectEqualStrings(want, got.slice);
+            } else if (field_index == 2) {
+                // NFD, time to test.
+                var w_buf = std.ArrayList(u8).init(allocator);
+                defer w_buf.deinit();
+
+                var w_fields = mem.split(u8, field, " ");
+                while (w_fields.next()) |s| {
+                    const wcp = try fmt.parseInt(u21, s, 16);
+                    const len = try unicode.utf8Encode(wcp, &cp_buf);
+                    try w_buf.appendSlice(cp_buf[0..len]);
+                }
+
+                const want = w_buf.items;
+                var got = try n.nfd(allocator, input);
+                defer got.deinit();
+
+                try testing.expectEqualStrings(want, got.slice);
+            } else if (field_index == 3) {
+                // NFKC, time to test.
+                var w_buf = std.ArrayList(u8).init(allocator);
+                defer w_buf.deinit();
+
+                var w_fields = mem.split(u8, field, " ");
+                while (w_fields.next()) |s| {
+                    const wcp = try fmt.parseInt(u21, s, 16);
+                    const len = try unicode.utf8Encode(wcp, &cp_buf);
+                    try w_buf.appendSlice(cp_buf[0..len]);
+                }
+
+                const want = w_buf.items;
+                var got = try n.nfkc(allocator, input);
+                defer got.deinit();
+
+                try testing.expectEqualStrings(want, got.slice);
+            } else if (field_index == 4) {
+                // NFKD, time to test.
+                var w_buf = std.ArrayList(u8).init(allocator);
+                defer w_buf.deinit();
+
+                var w_fields = mem.split(u8, field, " ");
+                while (w_fields.next()) |s| {
+                    const wcp = try fmt.parseInt(u21, s, 16);
+                    const len = try unicode.utf8Encode(wcp, &cp_buf);
+                    try w_buf.appendSlice(cp_buf[0..len]);
+                }
+
+                const want = w_buf.items;
+                const got = try n.nfkd(allocator, input);
+                defer got.deinit();
+
+                try testing.expectEqualStrings(want, got.slice);
+            } else {
+                continue;
+            }
+        }
+    }
+}
+
+test "Segmentation GraphemeIterator" {
+    const allocator = std.testing.allocator;
+    var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
+    defer file.close();
+    var buf_reader = std.io.bufferedReader(file.reader());
+    var input_stream = buf_reader.reader();
+
+    const data = try GraphemeData.init(allocator);
+    defer data.deinit();
+
+    var buf: [4096]u8 = undefined;
+    var line_no: usize = 1;
+
+    while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) {
+        // Skip comments or empty lines.
+        if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue;
+
+        // Clean up.
+        var line = std.mem.trimLeft(u8, raw, "÷ ");
+        if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| {
+            line = line[0..octo];
+        }
+        // Iterate over fields.
+        var want = std.ArrayList(Grapheme).init(allocator);
+        defer want.deinit();
+
+        var all_bytes = std.ArrayList(u8).init(allocator);
+        defer all_bytes.deinit();
+
+        var graphemes = std.mem.split(u8, line, " ÷ ");
+        var bytes_index: u32 = 0;
+
+        while (graphemes.next()) |field| {
+            var code_points = std.mem.split(u8, field, " ");
+            var cp_buf: [4]u8 = undefined;
+            var cp_index: u32 = 0;
+            var gc_len: u8 = 0;
+
+            while (code_points.next()) |code_point| {
+                if (std.mem.eql(u8, code_point, "×")) continue;
+                const cp: u21 = try std.fmt.parseInt(u21, code_point, 16);
+                const len = try unicode.utf8Encode(cp, &cp_buf);
+                try all_bytes.appendSlice(cp_buf[0..len]);
+                cp_index += len;
+                gc_len += len;
+            }
+
+            try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
+            bytes_index += cp_index;
+        }
+
+        // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
+        var iter = GraphemeIterator.init(all_bytes.items, &data);
+
+        // Chaeck.
+        for (want.items) |want_gc| {
+            const got_gc = (iter.next()).?;
+            try std.testing.expectEqualStrings(
+                want_gc.bytes(all_bytes.items),
+                got_gc.bytes(all_bytes.items),
+            );
+        }
+    }
+}