I will never get tired of vendoring dependencies. ha ha. It is possible
I am insane. I had to do a lot of pruning to get these not to be
ridiculous (especially the unicode data, which had nearly 1 million
lines of... stuff).
This commit is contained in:
2024-08-09 17:32:06 -07:00
commit 7692cb4bc7
155 changed files with 206515 additions and 0 deletions

21
deps/zg/LICENSE vendored Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Jose Colon Rodriguez
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

538
deps/zg/README.md vendored Normal file
View File

@@ -0,0 +1,538 @@
# zg
zg provides Unicode text processing for Zig projects.
## Unicode Version
The Unicode version supported by zg is 15.1.0.
## Zig Version
The minimum Zig version required is 0.13.0 stable.
## Integrating zg into your Zig Project
You first need to add zg as a dependency in your `build.zig.zon` file. In your
Zig project's root directory, run:
```plain
zig fetch --save https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz
```
Then instantiate the dependency in your `build.zig`:
```zig
const zg = b.dependency("zg", .{});
```
## A Modular Approach
zg is a modular library. This approach minimizes binary file size and memory
requirements by only including the Unicode data required for the specified module.
The following sections describe the various modules and their specific use case.
## Code Points
In the `code_point` module, you'll find a data structure representing a single code
point, `CodePoint`, and an `Iterator` to iterate over the code points in a string.
In your `build.zig`:
```zig
exe.root_module.addImport("code_point", zg.module("code_point"));
```
In your code:
```zig
const code_point = @import("code_point");
test "Code point iterator" {
const str = "Hi 😊";
var iter = code_point.Iterator{ .bytes = str };
var i: usize = 0;
while (iter.next()) |cp| : (i += 1) {
// The `code` field is the actual code point scalar as a `u21`.
if (i == 0) try expect(cp.code == 'H');
if (i == 1) try expect(cp.code == 'i');
if (i == 2) try expect(cp.code == ' ');
if (i == 3) {
try expect(cp.code == '😊');
// The `offset` field is the byte offset in the
// source string.
try expect(cp.offset == 3);
// The `len` field is the length in bytes of the
// code point in the source string.
try expect(cp.len == 4);
}
}
}
```
## Grapheme Clusters
Many characters are composed from more than one code point. These are known as
Grapheme Clusters and the `grapheme` module has a data structure to represent
them, `Grapheme`, and an `Iterator` to iterate over them in a string.
In your `build.zig`:
```zig
exe.root_module.addImport("grapheme", zg.module("grapheme"));
```
In your code:
```zig
const grapheme = @import("grapheme");
test "Grapheme cluster iterator" {
// we need some Unicode data to process Grapheme Clusters.
const gd = try grapheme.GraphemeData.init(allocator);
defer gd.deinit();
const str = "He\u{301}"; // Hé
var iter = grapheme.Iterator.init(str, &gd);
var i: usize = 0;
while (iter.next()) |gc| : (i += 1) {
// The `len` field is the length in bytes of the
// grapheme cluster in the source string.
if (i == 0) try expect(gc.len == 1);
if (i == 1) {
try expect(gc.len == 3);
// The `offset` in bytes of the grapheme cluster
// in the source string.
try expect(gc.offset == 1);
// The `bytes` method returns the slice of bytes
// that comprise this grapheme cluster in the
// source string `str`.
try expectEqualStrings("e\u{301}", gc.bytes(str));
}
}
}
```
## Unicode General Categories
To detect the general category for a code point, use the `GenCatData` module.
In your `build.zig`:
```zig
exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
```
In your code:
```zig
const GenCatData = @import("GenCatData");
test "General Category" {
const gcd = try GenCatData.init(allocator);
defer gcd.deinit();
// The `gc` method returns the abbreviated General Category.
// These abbreviations and descriptive comments can be found
// in the source file `src/GenCatData.zig` as en enum.
try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter
try expect(gcd.gc('3') == .Nd); // Nd: decimal number
// The following are convenience methods for groups of General
// Categories. For example, all letter categories start with `L`:
// Lu, Ll, Lt, Lo.
try expect(gcd.isControl(0));
try expect(gcd.isLetter('z'));
try expect(gcd.isMark('\u{301}'));
try expect(gcd.isNumber('3'));
try expect(gcd.isPunctuation('['));
try expect(gcd.isSeparator(' '));
try expect(gcd.isSymbol('©'));
}
```
## Unicode Properties
You can detect common properties of a code point with the `PropsData` module.
In your `build.zig`:
```zig
exe.root_module.addImport("PropsData", zg.module("PropsData"));
```
In your code:
```zig
const PropsData = @import("PropsData");
test "Properties" {
const pd = try PropsData.init(allocator);
defer pd.deinit();
// Mathematical symbols and letters.
try expect(pd.isMath('+'));
// Alphabetic only code points.
try expect(pd.isAlphabetic('Z'));
// Space, tab, and other separators.
try expect(pd.isWhitespace(' '));
// Hexadecimal digits and variations thereof.
try expect(pd.isHexDigit('f'));
try expect(!pd.isHexDigit('z'));
// Accents, dieresis, and other combining marks.
try expect(pd.isDiacritic('\u{301}'));
// Unicode has a specification for valid identifiers like
// the ones used in programming and regular expressions.
try expect(pd.isIdStart('Z')); // Identifier start character
try expect(!pd.isIdStart('1'));
try expect(pd.isIdContinue('1'));
// The `X` versions add some code points that can appear after
// normalizing a string.
try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character
try expect(pd.isXidContinue('\u{e33}'));
try expect(!pd.isXidStart('1'));
// Note surprising Unicode numeric type properties!
try expect(pd.isNumeric('\u{277f}'));
try expect(!pd.isNumeric('3')); // 3 is not numeric!
try expect(pd.isDigit('\u{2070}'));
try expect(!pd.isDigit('3')); // 3 is not a digit!
try expect(pd.isDecimal('3')); // 3 is a decimal digit
}
```
## Letter Case Detection and Conversion
To detect and convert to and from different letter cases, use the `CaseData`
module.
In your `build.zig`:
```zig
exe.root_module.addImport("CaseData", zg.module("CaseData"));
```
In your code:
```zig
const CaseData = @import("CaseData");
test "Case" {
const cd = try CaseData.init(allocator);
defer cd.deinit();
// Upper and lower case.
try expect(cd.isUpper('A'));
try expect('A' == cd.toUpper('a'));
try expect(cd.isLower('a'));
try expect('a' == cd.toLower('A'));
// Code points that have case.
try expect(cd.isCased('É'));
try expect(!cd.isCased('3'));
// Case detection and conversion for strings.
try expect(cd.isUpperStr("HELLO 123!"));
const ucased = try cd.toUpperStr(allocator, "hello 123");
defer allocator.free(ucased);
try expectEqualStrings("HELLO 123", ucased);
try expect(cd.isLowerStr("hello 123!"));
const lcased = try cd.toLowerStr(allocator, "HELLO 123");
defer allocator.free(lcased);
try expectEqualStrings("hello 123", lcased);
}
```
## Normalization
Unicode normalization is the process of converting a string into a uniform
representation that can guarantee a known structure by following a strict set
of rules. There are four normalization forms:
Canonical Composition (NFC)
: The most compact representation obtained by first
decomposing to Canonical Decomposition and then composing to NFC.
Compatibility Composition (NFKC)
: The most comprehensive composition obtained
by first decomposing to Compatibility Decomposition and then composing to NFKC.
Canonical Decomposition (NFD)
: Only code points with canonical decompositions
are decomposed. This is a more compact and faster decomposition but will not
provide the most comprehensive normalization possible.
Compatibility Decomposition (NFKD)
: The most comprehensive decomposition method
where both canonical and compatibility decompositions are performed recursively.
zg has methods to produce all four normalization forms in the `Normalize` module.
In your `build.zig`:
```zig
exe.root_module.addImport("Normalize", zg.module("Normalize"));
```
In your code:
```zig
const Normalize = @import("Normalize");
test "Normalization" {
// We need lots of Unicode dta for normalization.
var norm_data: Normalize.NormData = undefined;
try Normalize.NormData.init(&norm_data, allocator);
defer norm_data.deinit();
// The `Normalize` structure takes a pointer to the data.
const n = Normalize{ .norm_data = &norm_data };
// NFC: Canonical composition
const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
defer nfc_result.deinit();
try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice);
// NFKC: Compatibility composition
const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
defer nfkc_result.deinit();
try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice);
// NFD: Canonical decomposition
const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}");
defer nfd_result.deinit();
try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice);
// NFKD: Compatibility decomposition
const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
defer nfkd_result.deinit();
try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice);
// Test for equality of two strings after normalizing to NFC.
try expect(try n.eql(allocator, "foé", "foe\u{0301}"));
try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
}
```
## Caseless Matching via Case Folding
Unicode provides a more efficient way of comparing strings while ignoring letter
case differences: case folding. When you case fold a string, it's converted into a
normalized case form suitable for efficient matching. Use the `CaseFold` module
for this.
In your `build.zig`:
```zig
exe.root_module.addImport("Normalize", zg.module("Normalize"));
exe.root_module.addImport("CaseFold", zg.module("CaseFold"));
```
In your code:
```zig
const Normalize = @import("Normalize");
const CaseFold = @import("CaseFold");
test "Caseless matching" {
// We need to normalize during the matching process.
var norm_data: Normalize.NormData = undefined;
try Normalize.NormData.init(&norm_data, allocator);
defer norm_data.deinit();
const n = Normalize{ .norm_data = &norm_data };
// We need Unicode case fold data.
const cfd = try CaseFold.FoldData.init(allocator);
defer cfd.deinit();
// The `CaseFold` structure takes a pointer to the data.
const cf = CaseFold{ .fold_data = &cfd };
// `compatCaselessMatch` provides the deepest level of caseless
// matching because it decomposes fully to NFKD.
const a = "Héllo World! \u{3d3}";
const b = "He\u{301}llo World! \u{3a5}\u{301}";
try expect(try cf.compatCaselessMatch(allocator, &n, a, b));
const c = "He\u{301}llo World! \u{3d2}\u{301}";
try expect(try cf.compatCaselessMatch(allocator, &n, a, c));
// `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch`
// because it only decomposes to NFD. Naturally, it's faster because of this.
try expect(!try cf.canonCaselessMatch(allocator, &n, a, b));
try expect(try cf.canonCaselessMatch(allocator, &n, a, c));
}
```
## Display Width of Characters and Strings
When displaying text with a fixed-width font on a terminal screen, it's very
important to know exactly how many columns or cells each character should take.
Most characters will use one column, but there are many, like emoji and East-
Asian ideographs that need more space. The `DisplayWidth` module provides
methods for this purpose. It also has methods that use the display width calculation
to `center`, `padLeft`, `padRight`, and `wrap` text.
In your `build.zig`:
```zig
exe.root_module.addImport("DisplayWidth", zg.module("DisplayWidth"));
```
In your code:
```zig
const DisplayWidth = @import("DisplayWidth");
test "Display width" {
// We need Unicode data for display width calculation.
const dwd = try DisplayWidth.DisplayWidthData.init(allocator);
defer dwd.deinit();
// The `DisplayWidth` structure takes a pointer to the data.
const dw = DisplayWidth{ .data = &dwd };
// String display width
try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊"));
try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊"));
try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
// Centering text
const centered = try dw.center(allocator, "w😊w", 10, "-");
defer allocator.free(centered);
try expectEqualStrings("---w😊w---", centered);
// Pad left
const right_aligned = try dw.padLeft(allocator, "abc", 9, "*");
defer allocator.free(right_aligned);
try expectEqualStrings("******abc", right_aligned);
// Pad right
const left_aligned = try dw.padRight(allocator, "abc", 9, "*");
defer allocator.free(left_aligned);
try expectEqualStrings("abc******", left_aligned);
// Wrap text
const input = "The quick brown fox\r\njumped over the lazy dog!";
const wrapped = try dw.wrap(allocator, input, 10, 3);
defer allocator.free(wrapped);
const want =
\\The quick
\\brown fox
\\jumped
\\over the
\\lazy dog!
;
try expectEqualStrings(want, wrapped);
}
```
## Scripts
Unicode categorizes code points by the Script in which they belong. A Script
collects letters and other symbols that belong to a particular writing system.
You can detect the Script for a code point with the `ScriptsData` module.
In your `build.zig`:
```zig
exe.root_module.addImport("ScriptsData", zg.module("ScriptsData"));
```
In your code:
```zig
const ScriptsData = @import("ScriptsData");
test "Scripts" {
const sd = try ScriptsData.init(allocator);
defer sd.deinit();
// To see the full list of Scripts, look at the
// `src/ScriptsData.zig` file. They are list in an enum.
try expect(sd.script('A') == .Latin);
try expect(sd.script('Ω') == .Greek);
try expect(sd.script('צ') == .Hebrew);
}
```
## Relation to Ziglyph
zg is a total re-write of some of the components of Ziglyph. The idea was to
reduce binary size and improve performance. These goals were achieved by using
trie-like data structures (inspired by [Ghostty's implementation](https://mitchellh.com/writing/ghostty-devlog-006))
instead of generated functions. Where Ziglyph uses a function call, zg uses an
array lookup, which is quite faster. In addition, all these data structures in
zg are loaded at runtime from compressed versions in the binary. This allows
for smaller binary sizes at the expense of increased memory
footprint at runtime.
Benchmarks demonstrate the above stated goals have been met:
```plain
Binary sizes =======
149K ziglyph_case
87K zg_case
275K ziglyph_caseless
168K zg_caseless
68K ziglyph_codepoint
68K zg_codepoint
101K ziglyph_grapheme
86K zg_grapheme
185K ziglyph_normalizer
152K zg_normalize
101K ziglyph_width
86K zg_width
Benchmarks ==========
Ziglyph toUpperStr/toLowerStr: result: 7911596, took: 80
Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17
zg toUpperStr/toLowerStr: result: 7911596, took: 62
zg isUpperStr/isLowerStr: result: 110959, took: 7
Ziglyph Normalizer.eqlCaseless: result: 625, took: 500
zg CaseFold.canonCaselessMatch: result: 625, took: 385
zg CaseFold.compatCaselessMatch: result: 625, took: 593
Ziglyph CodePointIterator: result: 3769314, took: 2
zg CodePointIterator: result: 3769314, took: 3
Ziglyph GraphemeIterator: result: 3691806, took: 48
zg GraphemeIterator: result: 3691806, took: 16
Ziglyph Normalizer.nfkc: result: 3934162, took: 416
zg Normalize.nfkc: result: 3934162, took: 182
Ziglyph Normalizer.nfc: result: 3955798, took: 57
zg Normalize.nfc: result: 3955798, took: 28
Ziglyph Normalizer.nfkd: result: 4006398, took: 172
zg Normalize.nfkd: result: 4006398, took: 104
Ziglyph Normalizer.nfd: result: 4028034, took: 169
zg Normalize.nfd: result: 4028034, took: 104
Ziglyph Normalizer.eql: result: 625, took: 337
Zg Normalize.eql: result: 625, took: 53
Ziglyph display_width.strWidth: result: 3700914, took: 71
zg DisplayWidth.strWidth: result: 3700914, took: 24
```
These results were obtained on an M1 Mac with 16 GiB of RAM.
In contrast to Ziglyph, zg does not have:
- Word segmentation
- Sentence segmentation
- Collation
It's possible that any missing functionality will be added in future versions,
but only if enough demand is present in the community.

1
deps/zg/UNICODE_VERSION.txt vendored Normal file
View File

@@ -0,0 +1 @@
This software is compatible with Unicode version 15.1.0

337
deps/zg/build.zig vendored Normal file
View File

@@ -0,0 +1,337 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
// Code generation
// Grapheme break
const gbp_gen_exe = b.addExecutable(.{
.name = "gbp",
.root_source_file = b.path("codegen/gbp.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe);
const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z");
// Display width
const cjk = b.option(bool, "cjk", "Ambiguouse code points are wide (display width: 2).") orelse false;
const options = b.addOptions();
options.addOption(bool, "cjk", cjk);
const dwp_gen_exe = b.addExecutable(.{
.name = "dwp",
.root_source_file = b.path("codegen/dwp.zig"),
.target = b.host,
.optimize = .Debug,
});
dwp_gen_exe.root_module.addOptions("options", options);
const run_dwp_gen_exe = b.addRunArtifact(dwp_gen_exe);
const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z");
// Normalization properties
const canon_gen_exe = b.addExecutable(.{
.name = "canon",
.root_source_file = b.path("codegen/canon.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe);
const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z");
const compat_gen_exe = b.addExecutable(.{
.name = "compat",
.root_source_file = b.path("codegen/compat.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe);
const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z");
const hangul_gen_exe = b.addExecutable(.{
.name = "hangul",
.root_source_file = b.path("codegen/hangul.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_hangul_gen_exe = b.addRunArtifact(hangul_gen_exe);
const hangul_gen_out = run_hangul_gen_exe.addOutputFileArg("hangul.bin.z");
const normp_gen_exe = b.addExecutable(.{
.name = "normp",
.root_source_file = b.path("codegen/normp.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_normp_gen_exe = b.addRunArtifact(normp_gen_exe);
const normp_gen_out = run_normp_gen_exe.addOutputFileArg("normp.bin.z");
const ccc_gen_exe = b.addExecutable(.{
.name = "ccc",
.root_source_file = b.path("codegen/ccc.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_ccc_gen_exe = b.addRunArtifact(ccc_gen_exe);
const ccc_gen_out = run_ccc_gen_exe.addOutputFileArg("ccc.bin.z");
const gencat_gen_exe = b.addExecutable(.{
.name = "gencat",
.root_source_file = b.path("codegen/gencat.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe);
const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z");
const fold_gen_exe = b.addExecutable(.{
.name = "fold",
.root_source_file = b.path("codegen/fold.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_fold_gen_exe = b.addRunArtifact(fold_gen_exe);
const fold_gen_out = run_fold_gen_exe.addOutputFileArg("fold.bin.z");
// Numeric types
const num_gen_exe = b.addExecutable(.{
.name = "numeric",
.root_source_file = b.path("codegen/numeric.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_num_gen_exe = b.addRunArtifact(num_gen_exe);
const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z");
// Letter case properties
const case_prop_gen_exe = b.addExecutable(.{
.name = "case_prop",
.root_source_file = b.path("codegen/case_prop.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe);
const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z");
// Uppercase mappings
const upper_gen_exe = b.addExecutable(.{
.name = "upper",
.root_source_file = b.path("codegen/upper.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe);
const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z");
// Lowercase mappings
const lower_gen_exe = b.addExecutable(.{
.name = "lower",
.root_source_file = b.path("codegen/lower.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe);
const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z");
const scripts_gen_exe = b.addExecutable(.{
.name = "scripts",
.root_source_file = b.path("codegen/scripts.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe);
const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z");
const core_gen_exe = b.addExecutable(.{
.name = "core",
.root_source_file = b.path("codegen/core_props.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_core_gen_exe = b.addRunArtifact(core_gen_exe);
const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z");
const props_gen_exe = b.addExecutable(.{
.name = "props",
.root_source_file = b.path("codegen/props.zig"),
.target = b.host,
.optimize = .Debug,
});
const run_props_gen_exe = b.addRunArtifact(props_gen_exe);
const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z");
// Modules we provide
// Code points
const code_point = b.addModule("code_point", .{
.root_source_file = b.path("src/code_point.zig"),
.target = target,
.optimize = optimize,
});
// Grapheme clusters
const grapheme_data = b.createModule(.{
.root_source_file = b.path("src/GraphemeData.zig"),
.target = target,
.optimize = optimize,
});
grapheme_data.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out });
const grapheme = b.addModule("grapheme", .{
.root_source_file = b.path("src/grapheme.zig"),
.target = target,
.optimize = optimize,
});
grapheme.addImport("code_point", code_point);
grapheme.addImport("GraphemeData", grapheme_data);
// ASCII utilities
const ascii = b.addModule("ascii", .{
.root_source_file = b.path("src/ascii.zig"),
.target = target,
.optimize = optimize,
});
// Fixed pitch font display width
const width_data = b.createModule(.{
.root_source_file = b.path("src/WidthData.zig"),
.target = target,
.optimize = optimize,
});
width_data.addAnonymousImport("dwp", .{ .root_source_file = dwp_gen_out });
width_data.addImport("GraphemeData", grapheme_data);
const display_width = b.addModule("DisplayWidth", .{
.root_source_file = b.path("src/DisplayWidth.zig"),
.target = target,
.optimize = optimize,
});
display_width.addImport("ascii", ascii);
display_width.addImport("code_point", code_point);
display_width.addImport("grapheme", grapheme);
display_width.addImport("DisplayWidthData", width_data);
// Normalization
const ccc_data = b.createModule(.{
.root_source_file = b.path("src/CombiningData.zig"),
.target = target,
.optimize = optimize,
});
ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out });
const canon_data = b.createModule(.{
.root_source_file = b.path("src/CanonData.zig"),
.target = target,
.optimize = optimize,
});
canon_data.addAnonymousImport("canon", .{ .root_source_file = canon_gen_out });
const compat_data = b.createModule(.{
.root_source_file = b.path("src/CompatData.zig"),
.target = target,
.optimize = optimize,
});
compat_data.addAnonymousImport("compat", .{ .root_source_file = compat_gen_out });
const hangul_data = b.createModule(.{
.root_source_file = b.path("src/HangulData.zig"),
.target = target,
.optimize = optimize,
});
hangul_data.addAnonymousImport("hangul", .{ .root_source_file = hangul_gen_out });
const normp_data = b.createModule(.{
.root_source_file = b.path("src/NormPropsData.zig"),
.target = target,
.optimize = optimize,
});
normp_data.addAnonymousImport("normp", .{ .root_source_file = normp_gen_out });
const norm_data = b.createModule(.{
.root_source_file = b.path("src/NormData.zig"),
.target = target,
.optimize = optimize,
});
norm_data.addImport("CanonData", canon_data);
norm_data.addImport("CombiningData", ccc_data);
norm_data.addImport("CompatData", compat_data);
norm_data.addImport("HangulData", hangul_data);
norm_data.addImport("NormPropsData", normp_data);
const norm = b.addModule("Normalize", .{
.root_source_file = b.path("src/Normalize.zig"),
.target = target,
.optimize = optimize,
});
norm.addImport("ascii", ascii);
norm.addImport("code_point", code_point);
norm.addImport("NormData", norm_data);
// General Category
const gencat_data = b.addModule("GenCatData", .{
.root_source_file = b.path("src/GenCatData.zig"),
.target = target,
.optimize = optimize,
});
gencat_data.addAnonymousImport("gencat", .{ .root_source_file = gencat_gen_out });
// Case folding
const fold_data = b.createModule(.{
.root_source_file = b.path("src/FoldData.zig"),
.target = target,
.optimize = optimize,
});
fold_data.addAnonymousImport("fold", .{ .root_source_file = fold_gen_out });
const case_fold = b.addModule("CaseFold", .{
.root_source_file = b.path("src/CaseFold.zig"),
.target = target,
.optimize = optimize,
});
case_fold.addImport("ascii", ascii);
case_fold.addImport("FoldData", fold_data);
case_fold.addImport("Normalize", norm);
// Letter case
const case_data = b.addModule("CaseData", .{
.root_source_file = b.path("src/CaseData.zig"),
.target = target,
.optimize = optimize,
});
case_data.addImport("code_point", code_point);
case_data.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out });
case_data.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out });
case_data.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out });
// Scripts
const scripts_data = b.addModule("ScriptsData", .{
.root_source_file = b.path("src/ScriptsData.zig"),
.target = target,
.optimize = optimize,
});
scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out });
// Properties
const props_data = b.addModule("PropsData", .{
.root_source_file = b.path("src/PropsData.zig"),
.target = target,
.optimize = optimize,
});
props_data.addAnonymousImport("core_props", .{ .root_source_file = core_gen_out });
props_data.addAnonymousImport("props", .{ .root_source_file = props_gen_out });
props_data.addAnonymousImport("numeric", .{ .root_source_file = num_gen_out });
// Unicode Tests
const unicode_tests = b.addTest(.{
.root_source_file = b.path("src/unicode_tests.zig"),
.target = target,
.optimize = optimize,
});
unicode_tests.root_module.addImport("grapheme", grapheme);
unicode_tests.root_module.addImport("Normalize", norm);
const run_unicode_tests = b.addRunArtifact(unicode_tests);
const unicode_test_step = b.step("unicode-test", "Run Unicode tests");
unicode_test_step.dependOn(&run_unicode_tests.step);
}

17
deps/zg/build.zig.zon vendored Normal file
View File

@@ -0,0 +1,17 @@
.{
.name = "zg",
.version = "0.13.1",
.minimum_zig_version = "0.13.0",
.paths = .{
"build.zig",
"build.zig.zon",
"codegen",
"data",
"LICENSE",
"README.md",
"src",
"unicode_license",
"UNICODE_VERSION.txt",
},
}

67
deps/zg/codegen/canon.zig vendored Normal file
View File

@@ -0,0 +1,67 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cps: [3]u24 = undefined;
var len: u8 = 2;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
5 => {
// Not canonical.
if (field.len == 0 or field[0] == '<') continue :lines;
if (std.mem.indexOfScalar(u8, field, ' ')) |space| {
// Canonical
len = 3;
cps[1] = try std.fmt.parseInt(u24, field[0..space], 16);
cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16);
} else {
// Singleton
cps[1] = try std.fmt.parseInt(u24, field, 16);
}
},
2 => if (line[0] == '<') continue :lines,
else => {},
}
}
try writer.writeInt(u8, @intCast(len), endian);
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
}
try writer.writeInt(u16, 0, endian);
try out_comp.flush();
}

135
deps/zg/codegen/case_prop.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCoreProperties.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Props
var bit: u8 = 0;
if (mem.eql(u8, field, "Lowercase")) bit = 1;
if (mem.eql(u8, field, "Uppercase")) bit = 2;
if (mem.eql(u8, field, "Cased")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

124
deps/zg/codegen/ccc.zig vendored Normal file
View File

@@ -0,0 +1,124 @@
const std = @import("std");
const builtin = @import("builtin");
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCombiningClass.txt
var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{});
defer cc_file.close();
var cc_buf = std.io.bufferedReader(cc_file.reader());
const cc_reader = cc_buf.reader();
while (try cc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Combining Class
if (std.mem.eql(u8, field, "0")) continue;
const cc = try std.fmt.parseInt(u8, field, 10);
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), cc);
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const cc = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = cc;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

64
deps/zg/codegen/compat.zig vendored Normal file
View File

@@ -0,0 +1,64 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cps: [19]u24 = undefined;
var len: u8 = 1;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
5 => {
// Not compatibility.
if (field.len == 0 or field[0] != '<') continue :lines;
var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
_ = cp_iter.next(); // <compat type>
while (cp_iter.next()) |cp_str| : (len += 1) {
cps[len] = try std.fmt.parseInt(u24, cp_str, 16);
}
},
2 => if (line[0] == '<') continue :lines,
else => {},
}
}
try writer.writeInt(u8, @intCast(len), endian);
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
}
try writer.writeInt(u16, 0, endian);
try out_comp.flush();
}

138
deps/zg/codegen/core_props.zig vendored Normal file
View File

@@ -0,0 +1,138 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedCoreProperties.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
var bit: u8 = 0;
if (mem.eql(u8, field, "Math")) bit = 1;
if (mem.eql(u8, field, "Alphabetic")) bit = 2;
if (mem.eql(u8, field, "ID_Start")) bit = 4;
if (mem.eql(u8, field, "ID_Continue")) bit = 8;
if (mem.eql(u8, field, "XID_Start")) bit = 16;
if (mem.eql(u8, field, "XID_Continue")) bit = 32;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

244
deps/zg/codegen/dwp.zig vendored Normal file
View File

@@ -0,0 +1,244 @@
const std = @import("std");
const builtin = @import("builtin");
const options = @import("options");
const block_size = 256;
const Block = [block_size]i3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(i3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, i3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedEastAsianWidth.txt
var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{});
defer deaw_file.close();
var deaw_buf = std.io.bufferedReader(deaw_file.reader());
const deaw_reader = deaw_buf.reader();
while (try deaw_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
// @missing ranges
if (std.mem.startsWith(u8, line, "# @missing: ")) {
const semi = std.mem.indexOfScalar(u8, line, ';').?;
const field = line[12..semi];
const dots = std.mem.indexOf(u8, field, "..").?;
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
if (from == 0 and to == 0x10ffff) continue;
for (from..to + 1) |cp| try flat_map.put(@intCast(cp), 2);
continue;
}
if (line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Width
if (std.mem.eql(u8, field, "W") or
std.mem.eql(u8, field, "F") or
(options.cjk and std.mem.eql(u8, field, "A")))
{
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 2);
}
},
else => {},
}
}
}
// Process DerivedGeneralCategory.txt
var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
defer dgc_file.close();
var dgc_buf = std.io.bufferedReader(dgc_file.reader());
const dgc_reader = dgc_buf.reader();
while (try dgc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// General category
if (std.mem.eql(u8, field, "Mn")) {
// Nonspacing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Me")) {
// Enclosing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Mc")) {
// Spacing_Mark
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
} else if (std.mem.eql(u8, field, "Cf")) {
if (std.mem.indexOf(u8, line, "ARABIC") == null) {
// Format except Arabic
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(i3).init(allocator);
defer stage2.deinit();
var block: Block = [_]i3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
var width = flat_map.get(cp) orelse 1;
// Specific overrides
switch (cp) {
// Three-em dash
0x2e3b => width = 3,
// C0/C1 control codes
0...0x20,
0x80...0xa0,
// Line separator
0x2028,
// Paragraph separator
0x2029,
// Hangul syllable and ignorable.
0x1160...0x11ff,
0xd7b0...0xd7ff,
0x2060...0x206f,
0xfff0...0xfff8,
0xe0000...0xE0fff,
=> width = 0,
// Two-em dash
0x2e3a,
// Regional indicators
0x1f1e6...0x1f200,
// CJK Blocks
0x3400...0x4dbf, // CJK Unified Ideographs Extension A
0x4e00...0x9fff, // CJK Unified Ideographs
0xf900...0xfaff, // CJK Compatibility Ideographs
0x20000...0x2fffd, // Plane 2
0x30000...0x3fffd, // Plane 3
=> width = 2,
else => {},
}
// ASCII
if (0x20 <= cp and cp < 0x7f) width = 1;
// Soft hyphen
if (cp == 0xad) width = 1;
// Backspace and delete
if (cp == 0x8 or cp == 0x7f) width = -1;
// Process block
block[block_len] = width;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(i8, i, endian);
try out_comp.flush();
}

252
deps/zg/codegen/fold.zig vendored Normal file
View File

@@ -0,0 +1,252 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
// Process DerivedCoreProperties.txt
var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer props_file.close();
var props_buf = std.io.bufferedReader(props_file.reader());
const props_reader = props_buf.reader();
var props_map = std.AutoHashMap(u21, void).init(allocator);
defer props_map.deinit();
var line_buf: [4096]u8 = undefined;
props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines;
for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {});
},
else => {},
}
}
}
var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
defer codepoint_mapping.deinit();
// Process CaseFolding.txt
var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
defer cp_file.close();
var cp_buf = std.io.bufferedReader(cp_file.reader());
const cp_reader = cp_buf.reader();
while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
var field_it = std.mem.splitScalar(u8, line, ';');
const codepoint_str = field_it.first();
const codepoint = try std.fmt.parseUnsigned(u21, codepoint_str, 16);
const status = std.mem.trim(u8, field_it.next() orelse continue, " ");
// Only interested in 'common' and 'full'
if (status[0] != 'C' and status[0] != 'F') continue;
const mapping = std.mem.trim(u8, field_it.next() orelse continue, " ");
var mapping_it = std.mem.splitScalar(u8, mapping, ' ');
var mapping_buf = [_]u21{0} ** 3;
var mapping_i: u8 = 0;
while (mapping_it.next()) |mapping_c| {
mapping_buf[mapping_i] = try std.fmt.parseInt(u21, mapping_c, 16);
mapping_i += 1;
}
try codepoint_mapping.putNoClobber(codepoint, mapping_buf);
}
var changes_when_casefolded_exceptions = std.ArrayList(u21).init(allocator);
defer changes_when_casefolded_exceptions.deinit();
{
// Codepoints with a case fold mapping can be missing the Changes_When_Casefolded property,
// but not vice versa.
for (codepoint_mapping.keys()) |codepoint| {
if (props_map.get(codepoint) == null) {
try changes_when_casefolded_exceptions.append(codepoint);
}
}
}
var offset_to_index = std.AutoHashMap(i32, u8).init(allocator);
defer offset_to_index.deinit();
var unique_offsets = std.AutoArrayHashMap(i32, u32).init(allocator);
defer unique_offsets.deinit();
// First pass
{
var it = codepoint_mapping.iterator();
while (it.next()) |entry| {
const codepoint = entry.key_ptr.*;
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
if (mappings.len == 1) {
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
const result = try unique_offsets.getOrPut(offset);
if (!result.found_existing) result.value_ptr.* = 0;
result.value_ptr.* += 1;
}
}
// A codepoint mapping to itself (offset=0) is the most common case
try unique_offsets.put(0, 0x10FFFF);
const C = struct {
vals: []u32,
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
return ctx.vals[a_index] > ctx.vals[b_index];
}
};
unique_offsets.sort(C{ .vals = unique_offsets.values() });
var offset_it = unique_offsets.iterator();
var offset_index: u7 = 0;
while (offset_it.next()) |entry| {
try offset_to_index.put(entry.key_ptr.*, offset_index);
offset_index += 1;
}
}
var mappings_to_index = std.AutoArrayHashMap([3]u21, u8).init(allocator);
defer mappings_to_index.deinit();
var codepoint_to_index = std.AutoHashMap(u21, u8).init(allocator);
defer codepoint_to_index.deinit();
// Second pass
{
var count_multiple_codepoints: u8 = 0;
var it = codepoint_mapping.iterator();
while (it.next()) |entry| {
const codepoint = entry.key_ptr.*;
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
if (mappings.len > 1) {
const result = try mappings_to_index.getOrPut(entry.value_ptr.*);
if (!result.found_existing) {
result.value_ptr.* = 0x80 | count_multiple_codepoints;
count_multiple_codepoints += 1;
}
const index = result.value_ptr.*;
try codepoint_to_index.put(codepoint, index);
} else {
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
const index = offset_to_index.get(offset).?;
try codepoint_to_index.put(codepoint, index);
}
}
}
// Build the stage1/stage2/stage3 arrays and output them
{
const Block = [256]u8;
var stage2_blocks = std.AutoArrayHashMap(Block, void).init(allocator);
defer stage2_blocks.deinit();
const empty_block: Block = [_]u8{0} ** 256;
try stage2_blocks.put(empty_block, {});
const stage1_len = (0x10FFFF / 256) + 1;
var stage1: [stage1_len]u8 = undefined;
var codepoint: u21 = 0;
var block: Block = undefined;
while (codepoint <= 0x10FFFF) {
const data_index = codepoint_to_index.get(codepoint) orelse 0;
block[codepoint % 256] = data_index;
codepoint += 1;
if (codepoint % 256 == 0) {
const result = try stage2_blocks.getOrPut(block);
const index = result.index;
stage1[(codepoint >> 8) - 1] = @intCast(index);
}
}
const last_meaningful_block = std.mem.lastIndexOfNone(u8, &stage1, "\x00").?;
const meaningful_stage1 = stage1[0 .. last_meaningful_block + 1];
const codepoint_cutoff = (last_meaningful_block + 1) << 8;
const multiple_codepoint_start: usize = unique_offsets.count();
var index: usize = 0;
const stage3_elems = unique_offsets.count() + mappings_to_index.count() * 3;
var stage3 = try allocator.alloc(i24, stage3_elems);
defer allocator.free(stage3);
for (unique_offsets.keys()) |key| {
stage3[index] = @intCast(key);
index += 1;
}
for (mappings_to_index.keys()) |key| {
stage3[index] = @intCast(key[0]);
stage3[index + 1] = @intCast(key[1]);
stage3[index + 2] = @intCast(key[2]);
index += 3;
}
const stage2_elems = stage2_blocks.count() * 256;
var stage2 = try allocator.alloc(u8, stage2_elems);
defer allocator.free(stage2);
for (stage2_blocks.keys(), 0..) |key, i| {
@memcpy(stage2[i * 256 ..][0..256], &key);
}
// Write out compressed binary data file.
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
// Table metadata.
try writer.writeInt(u24, @intCast(codepoint_cutoff), endian);
try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian);
// Stage 1
try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian);
try writer.writeAll(meaningful_stage1);
// Stage 2
try writer.writeInt(u16, @intCast(stage2.len), endian);
try writer.writeAll(stage2);
// Stage 3
try writer.writeInt(u16, @intCast(stage3.len), endian);
for (stage3) |offset| try writer.writeInt(i24, offset, endian);
// Changes when case folded
// Min and max
try writer.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian);
try writer.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian);
try writer.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian);
for (changes_when_casefolded_exceptions.items) |cp| try writer.writeInt(u24, cp, endian);
try out_comp.flush();
}
}

248
deps/zg/codegen/gbp.zig vendored Normal file
View File

@@ -0,0 +1,248 @@
const std = @import("std");
const builtin = @import("builtin");
const Indic = enum {
none,
Consonant,
Extend,
Linker,
};
const Gbp = enum {
none,
Control,
CR,
Extend,
L,
LF,
LV,
LVT,
Prepend,
Regional_Indicator,
SpacingMark,
T,
V,
ZWJ,
};
const block_size = 256;
const Block = [block_size]u16;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u16, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var indic_map = std.AutoHashMap(u21, Indic).init(allocator);
defer indic_map.deinit();
var gbp_map = std.AutoHashMap(u21, Gbp).init(allocator);
defer gbp_map.deinit();
var emoji_set = std.AutoHashMap(u21, void).init(allocator);
defer emoji_set.deinit();
var line_buf: [4096]u8 = undefined;
// Process Indic
var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
defer indic_file.close();
var indic_buf = std.io.bufferedReader(indic_file.reader());
const indic_reader = indic_buf.reader();
while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.indexOf(u8, line, "InCB") == null) continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
2 => {
// Prop
const prop = std.meta.stringToEnum(Indic, field) orelse return error.InvalidPorp;
for (current_code[0]..current_code[1] + 1) |cp| try indic_map.put(@intCast(cp), prop);
},
else => {},
}
}
}
// Process GBP
var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
defer gbp_file.close();
var gbp_buf = std.io.bufferedReader(gbp_file.reader());
const gbp_reader = gbp_buf.reader();
while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Prop
const prop = std.meta.stringToEnum(Gbp, field) orelse return error.InvalidPorp;
for (current_code[0]..current_code[1] + 1) |cp| try gbp_map.put(@intCast(cp), prop);
},
else => {},
}
}
}
// Process Emoji
var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
defer emoji_file.close();
var emoji_buf = std.io.bufferedReader(emoji_file.reader());
const emoji_reader = emoji_buf.reader();
while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
for (from..to + 1) |cp| try emoji_set.put(@intCast(cp), {});
} else {
const cp = try std.fmt.parseInt(u21, field, 16);
try emoji_set.put(@intCast(cp), {});
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u16).init(allocator);
defer stage2.deinit();
var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
defer stage3.deinit();
var stage3_len: u16 = 0;
var block: Block = [_]u16{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const gbp_prop: u8 = @intFromEnum(gbp_map.get(cp) orelse .none);
const indic_prop: u8 = @intFromEnum(indic_map.get(cp) orelse .none);
const emoji_prop: u1 = @intFromBool(emoji_set.contains(cp));
var props_byte: u8 = gbp_prop << 4;
props_byte |= indic_prop << 1;
props_byte |= emoji_prop;
const stage3_idx = blk: {
const gop = try stage3.getOrPut(props_byte);
if (!gop.found_existing) {
gop.value_ptr.* = stage3_len;
stage3_len += 1;
}
break :blk gop.value_ptr.*;
};
block[block_len] = stage3_idx;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u16, i, endian);
const props_bytes = stage3.keys();
try writer.writeInt(u16, @intCast(props_bytes.len), endian);
try writer.writeAll(props_bytes);
try out_comp.flush();
}

171
deps/zg/codegen/gencat.zig vendored Normal file
View File

@@ -0,0 +1,171 @@
const std = @import("std");
const builtin = @import("builtin");
const Gc = enum {
Cc, // Other, Control
Cf, // Other, Format
Cn, // Other, Unassigned
Co, // Other, Private Use
Cs, // Other, Surrogate
Ll, // Letter, Lowercase
Lm, // Letter, Modifier
Lo, // Letter, Other
Lu, // Letter, Uppercase
Lt, // Letter, Titlecase
Mc, // Mark, Spacing Combining
Me, // Mark, Enclosing
Mn, // Mark, Non-Spacing
Nd, // Number, Decimal Digit
Nl, // Number, Letter
No, // Number, Other
Pc, // Punctuation, Connector
Pd, // Punctuation, Dash
Pe, // Punctuation, Close
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
Po, // Punctuation, Other
Ps, // Punctuation, Open
Sc, // Symbol, Currency
Sk, // Symbol, Modifier
Sm, // Symbol, Math
So, // Symbol, Other
Zl, // Separator, Line
Zp, // Separator, Paragraph
Zs, // Separator, Space
};
const block_size = 256;
const Block = [block_size]u5;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u5, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u5).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedGeneralCategory.txt
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// General category
const gc = std.meta.stringToEnum(Gc, field) orelse return error.UnknownGenCat;
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(gc));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u5).init(allocator);
defer stage2.deinit();
var stage3 = std.ArrayList(u5).init(allocator);
defer stage3.deinit();
var block: Block = [_]u5{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const gc = flat_map.get(cp).?;
const stage3_idx = blk: {
for (stage3.items, 0..) |gci, j| {
if (gc == gci) break :blk j;
}
try stage3.append(gc);
break :blk stage3.items.len - 1;
};
// Process block
block[block_len] = @intCast(stage3_idx);
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

133
deps/zg/codegen/hangul.zig vendored Normal file
View File

@@ -0,0 +1,133 @@
const std = @import("std");
const builtin = @import("builtin");
const Syllable = enum {
none,
L,
LV,
LVT,
V,
T,
};
const block_size = 256;
const Block = [block_size]u3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process HangulSyllableType.txt
var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Syllable type
const st: Syllable = std.meta.stringToEnum(Syllable, field) orelse .none;
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(st));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u3).init(allocator);
defer stage2.deinit();
var block: Block = [_]u3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const st = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = st;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

57
deps/zg/codegen/lower.zig vendored Normal file
View File

@@ -0,0 +1,57 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cp: i24 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cp = try std.fmt.parseInt(i24, field, 16),
2 => if (line[0] == '<') continue :lines,
13 => {
// Simple lowercase mapping
if (field.len == 0) continue :lines;
try writer.writeInt(i24, cp, endian);
const mapping = try std.fmt.parseInt(i24, field, 16);
try writer.writeInt(i24, mapping - cp, endian);
},
else => {},
}
}
}
try writer.writeInt(u24, 0, endian);
try out_comp.flush();
}

134
deps/zg/codegen/normp.zig vendored Normal file
View File

@@ -0,0 +1,134 @@
const std = @import("std");
const builtin = @import("builtin");
const block_size = 256;
const Block = [block_size]u3;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u3, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedNormalizationProps.txt
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Norm props
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
if (std.mem.eql(u8, field, "NFD_QC")) {
gop.value_ptr.* |= 1;
} else if (std.mem.eql(u8, field, "NFKD_QC")) {
gop.value_ptr.* |= 2;
} else if (std.mem.eql(u8, field, "Full_Composition_Exclusion")) {
gop.value_ptr.* |= 4;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u3).init(allocator);
defer stage2.deinit();
var block: Block = [_]u3{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const props = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = props;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

135
deps/zg/codegen/numeric.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedNumericType.txt
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Numeric type
var bit: u8 = 0;
if (mem.eql(u8, field, "Numeric")) bit = 1;
if (mem.eql(u8, field, "Digit")) bit = 2;
if (mem.eql(u8, field, "Decimal")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const nt = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = nt;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

135
deps/zg/codegen/props.zig vendored Normal file
View File

@@ -0,0 +1,135 @@
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process PropList.txt
var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Core property
var bit: u8 = 0;
if (mem.eql(u8, field, "White_Space")) bit = 1;
if (mem.eql(u8, field, "Hex_Digit")) bit = 2;
if (mem.eql(u8, field, "Diacritic")) bit = 4;
if (bit != 0) {
for (current_code[0]..current_code[1] + 1) |cp| {
const gop = try flat_map.getOrPut(@intCast(cp));
if (!gop.found_existing) gop.value_ptr.* = 0;
gop.value_ptr.* |= bit;
}
}
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const prop = flat_map.get(cp) orelse 0;
// Process block
block[block_len] = prop;
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
try writer.writeAll(stage2.items);
try out_comp.flush();
}

308
deps/zg/codegen/scripts.zig vendored Normal file
View File

@@ -0,0 +1,308 @@
const std = @import("std");
const builtin = @import("builtin");
const Script = enum {
none,
Adlam,
Ahom,
Anatolian_Hieroglyphs,
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bassa_Vah,
Batak,
Bengali,
Bhaiksuki,
Bopomofo,
Brahmi,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
Cypriot,
Cypro_Minoan,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
Elymaic,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Grantha,
Greek,
Gujarati,
Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kawi,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
Lao,
Latin,
Lepcha,
Limbu,
Linear_A,
Linear_B,
Lisu,
Lycian,
Lydian,
Mahajani,
Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
Meroitic_Hieroglyphs,
Miao,
Modi,
Mongolian,
Mro,
Multani,
Myanmar,
Nabataean,
Nag_Mundari,
Nandinagari,
New_Tai_Lue,
Newa,
Nko,
Nushu,
Nyiakeng_Puachue_Hmong,
Ogham,
Ol_Chiki,
Old_Hungarian,
Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Osage,
Osmanya,
Pahawh_Hmong,
Palmyrene,
Pau_Cin_Hau,
Phags_Pa,
Phoenician,
Psalter_Pahlavi,
Rejang,
Runic,
Samaritan,
Saurashtra,
Sharada,
Shavian,
Siddham,
SignWriting,
Sinhala,
Sogdian,
Sora_Sompeng,
Soyombo,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Takri,
Tamil,
Tangsa,
Tangut,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Toto,
Ugaritic,
Vai,
Vithkuqi,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square,
};
const block_size = 256;
const Block = [block_size]u8;
const BlockMap = std.HashMap(
Block,
u16,
struct {
pub fn hash(_: @This(), k: Block) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
return hasher.final();
}
pub fn eql(_: @This(), a: Block, b: Block) bool {
return std.mem.eql(u8, &a, &b);
}
},
std.hash_map.default_max_load_percentage,
);
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
defer flat_map.deinit();
var line_buf: [4096]u8 = undefined;
// Process DerivedGeneralCategory.txt
var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0 or line[0] == '#') continue;
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
var current_code: [2]u21 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => {
// Code point(s)
if (std.mem.indexOf(u8, field, "..")) |dots| {
current_code = .{
try std.fmt.parseInt(u21, field[0..dots], 16),
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
};
} else {
const code = try std.fmt.parseInt(u21, field, 16);
current_code = .{ code, code };
}
},
1 => {
// Script
const script = std.meta.stringToEnum(Script, field) orelse {
std.debug.print("Unknown script: {s}\n", .{field});
return error.UnknownScript;
};
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(script));
},
else => {},
}
}
}
var blocks_map = BlockMap.init(allocator);
defer blocks_map.deinit();
var stage1 = std.ArrayList(u16).init(allocator);
defer stage1.deinit();
var stage2 = std.ArrayList(u8).init(allocator);
defer stage2.deinit();
var stage3 = std.ArrayList(u8).init(allocator);
defer stage3.deinit();
var block: Block = [_]u8{0} ** block_size;
var block_len: u16 = 0;
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
const script = flat_map.get(cp) orelse 0;
const stage3_idx = blk: {
for (stage3.items, 0..) |script_i, j| {
if (script == script_i) break :blk j;
}
try stage3.append(script);
break :blk stage3.items.len - 1;
};
// Process block
block[block_len] = @intCast(stage3_idx);
block_len += 1;
if (block_len < block_size and cp != 0x10ffff) continue;
const gop = try blocks_map.getOrPut(block);
if (!gop.found_existing) {
gop.value_ptr.* = @intCast(stage2.items.len);
try stage2.appendSlice(&block);
}
try stage1.append(gop.value_ptr.*);
block_len = 0;
}
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
try out_comp.flush();
}

57
deps/zg/codegen/upper.zig vendored Normal file
View File

@@ -0,0 +1,57 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn main() !void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = arena.allocator();
// Process UnicodeData.txt
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
defer in_file.close();
var in_buf = std.io.bufferedReader(in_file.reader());
const in_reader = in_buf.reader();
var args_iter = try std.process.argsWithAllocator(allocator);
defer args_iter.deinit();
_ = args_iter.skip();
const output_path = args_iter.next() orelse @panic("No output file arg!");
const compressor = std.compress.flate.deflate.compressor;
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
const writer = out_comp.writer();
const endian = builtin.cpu.arch.endian();
var line_buf: [4096]u8 = undefined;
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
if (line.len == 0) continue;
var field_iter = std.mem.splitScalar(u8, line, ';');
var cp: i24 = undefined;
var i: usize = 0;
while (field_iter.next()) |field| : (i += 1) {
switch (i) {
0 => cp = try std.fmt.parseInt(i24, field, 16),
2 => if (line[0] == '<') continue :lines,
12 => {
// Simple uppercase mapping
if (field.len == 0) continue :lines;
try writer.writeInt(i24, cp, endian);
const mapping = try std.fmt.parseInt(i24, field, 16);
try writer.writeInt(i24, mapping - cp, endian);
},
else => {},
}
}
}
try writer.writeInt(u24, 0, endian);
try out_comp.flush();
}

77508
deps/zg/data/lang_mix.txt vendored Normal file

File diff suppressed because it is too large Load Diff

1627
deps/zg/data/unicode/CaseFolding.txt vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,858 @@
# HangulSyllableType-15.1.0.txt
# Date: 2023-01-05, 20:34:42 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
# ================================================
# Property: Hangul_Syllable_Type
# All code points not explicitly listed for Hangul_Syllable_Type
# have the value Not_Applicable (NA).
# @missing: 0000..10FFFF; Not_Applicable
# ================================================
# Hangul_Syllable_Type=Leading_Jamo
1100..115F ; L # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER
A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
# Total code points: 125
# ================================================
# Hangul_Syllable_Type=Vowel_Jamo
1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE
D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
# Total code points: 95
# ================================================
# Hangul_Syllable_Type=Trailing_Jamo
11A8..11FF ; T # Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
D7CB..D7FB ; T # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
# Total code points: 137
# ================================================
# Hangul_Syllable_Type=LV_Syllable
AC00 ; LV # Lo HANGUL SYLLABLE GA
AC1C ; LV # Lo HANGUL SYLLABLE GAE
AC38 ; LV # Lo HANGUL SYLLABLE GYA
AC54 ; LV # Lo HANGUL SYLLABLE GYAE
AC70 ; LV # Lo HANGUL SYLLABLE GEO
AC8C ; LV # Lo HANGUL SYLLABLE GE
ACA8 ; LV # Lo HANGUL SYLLABLE GYEO
ACC4 ; LV # Lo HANGUL SYLLABLE GYE
ACE0 ; LV # Lo HANGUL SYLLABLE GO
ACFC ; LV # Lo HANGUL SYLLABLE GWA
AD18 ; LV # Lo HANGUL SYLLABLE GWAE
AD34 ; LV # Lo HANGUL SYLLABLE GOE
AD50 ; LV # Lo HANGUL SYLLABLE GYO
AD6C ; LV # Lo HANGUL SYLLABLE GU
AD88 ; LV # Lo HANGUL SYLLABLE GWEO
ADA4 ; LV # Lo HANGUL SYLLABLE GWE
ADC0 ; LV # Lo HANGUL SYLLABLE GWI
ADDC ; LV # Lo HANGUL SYLLABLE GYU
ADF8 ; LV # Lo HANGUL SYLLABLE GEU
AE14 ; LV # Lo HANGUL SYLLABLE GYI
AE30 ; LV # Lo HANGUL SYLLABLE GI
AE4C ; LV # Lo HANGUL SYLLABLE GGA
AE68 ; LV # Lo HANGUL SYLLABLE GGAE
AE84 ; LV # Lo HANGUL SYLLABLE GGYA
AEA0 ; LV # Lo HANGUL SYLLABLE GGYAE
AEBC ; LV # Lo HANGUL SYLLABLE GGEO
AED8 ; LV # Lo HANGUL SYLLABLE GGE
AEF4 ; LV # Lo HANGUL SYLLABLE GGYEO
AF10 ; LV # Lo HANGUL SYLLABLE GGYE
AF2C ; LV # Lo HANGUL SYLLABLE GGO
AF48 ; LV # Lo HANGUL SYLLABLE GGWA
AF64 ; LV # Lo HANGUL SYLLABLE GGWAE
AF80 ; LV # Lo HANGUL SYLLABLE GGOE
AF9C ; LV # Lo HANGUL SYLLABLE GGYO
AFB8 ; LV # Lo HANGUL SYLLABLE GGU
AFD4 ; LV # Lo HANGUL SYLLABLE GGWEO
AFF0 ; LV # Lo HANGUL SYLLABLE GGWE
B00C ; LV # Lo HANGUL SYLLABLE GGWI
B028 ; LV # Lo HANGUL SYLLABLE GGYU
B044 ; LV # Lo HANGUL SYLLABLE GGEU
B060 ; LV # Lo HANGUL SYLLABLE GGYI
B07C ; LV # Lo HANGUL SYLLABLE GGI
B098 ; LV # Lo HANGUL SYLLABLE NA
B0B4 ; LV # Lo HANGUL SYLLABLE NAE
B0D0 ; LV # Lo HANGUL SYLLABLE NYA
B0EC ; LV # Lo HANGUL SYLLABLE NYAE
B108 ; LV # Lo HANGUL SYLLABLE NEO
B124 ; LV # Lo HANGUL SYLLABLE NE
B140 ; LV # Lo HANGUL SYLLABLE NYEO
B15C ; LV # Lo HANGUL SYLLABLE NYE
B178 ; LV # Lo HANGUL SYLLABLE NO
B194 ; LV # Lo HANGUL SYLLABLE NWA
B1B0 ; LV # Lo HANGUL SYLLABLE NWAE
B1CC ; LV # Lo HANGUL SYLLABLE NOE
B1E8 ; LV # Lo HANGUL SYLLABLE NYO
B204 ; LV # Lo HANGUL SYLLABLE NU
B220 ; LV # Lo HANGUL SYLLABLE NWEO
B23C ; LV # Lo HANGUL SYLLABLE NWE
B258 ; LV # Lo HANGUL SYLLABLE NWI
B274 ; LV # Lo HANGUL SYLLABLE NYU
B290 ; LV # Lo HANGUL SYLLABLE NEU
B2AC ; LV # Lo HANGUL SYLLABLE NYI
B2C8 ; LV # Lo HANGUL SYLLABLE NI
B2E4 ; LV # Lo HANGUL SYLLABLE DA
B300 ; LV # Lo HANGUL SYLLABLE DAE
B31C ; LV # Lo HANGUL SYLLABLE DYA
B338 ; LV # Lo HANGUL SYLLABLE DYAE
B354 ; LV # Lo HANGUL SYLLABLE DEO
B370 ; LV # Lo HANGUL SYLLABLE DE
B38C ; LV # Lo HANGUL SYLLABLE DYEO
B3A8 ; LV # Lo HANGUL SYLLABLE DYE
B3C4 ; LV # Lo HANGUL SYLLABLE DO
B3E0 ; LV # Lo HANGUL SYLLABLE DWA
B3FC ; LV # Lo HANGUL SYLLABLE DWAE
B418 ; LV # Lo HANGUL SYLLABLE DOE
B434 ; LV # Lo HANGUL SYLLABLE DYO
B450 ; LV # Lo HANGUL SYLLABLE DU
B46C ; LV # Lo HANGUL SYLLABLE DWEO
B488 ; LV # Lo HANGUL SYLLABLE DWE
B4A4 ; LV # Lo HANGUL SYLLABLE DWI
B4C0 ; LV # Lo HANGUL SYLLABLE DYU
B4DC ; LV # Lo HANGUL SYLLABLE DEU
B4F8 ; LV # Lo HANGUL SYLLABLE DYI
B514 ; LV # Lo HANGUL SYLLABLE DI
B530 ; LV # Lo HANGUL SYLLABLE DDA
B54C ; LV # Lo HANGUL SYLLABLE DDAE
B568 ; LV # Lo HANGUL SYLLABLE DDYA
B584 ; LV # Lo HANGUL SYLLABLE DDYAE
B5A0 ; LV # Lo HANGUL SYLLABLE DDEO
B5BC ; LV # Lo HANGUL SYLLABLE DDE
B5D8 ; LV # Lo HANGUL SYLLABLE DDYEO
B5F4 ; LV # Lo HANGUL SYLLABLE DDYE
B610 ; LV # Lo HANGUL SYLLABLE DDO
B62C ; LV # Lo HANGUL SYLLABLE DDWA
B648 ; LV # Lo HANGUL SYLLABLE DDWAE
B664 ; LV # Lo HANGUL SYLLABLE DDOE
B680 ; LV # Lo HANGUL SYLLABLE DDYO
B69C ; LV # Lo HANGUL SYLLABLE DDU
B6B8 ; LV # Lo HANGUL SYLLABLE DDWEO
B6D4 ; LV # Lo HANGUL SYLLABLE DDWE
B6F0 ; LV # Lo HANGUL SYLLABLE DDWI
B70C ; LV # Lo HANGUL SYLLABLE DDYU
B728 ; LV # Lo HANGUL SYLLABLE DDEU
B744 ; LV # Lo HANGUL SYLLABLE DDYI
B760 ; LV # Lo HANGUL SYLLABLE DDI
B77C ; LV # Lo HANGUL SYLLABLE RA
B798 ; LV # Lo HANGUL SYLLABLE RAE
B7B4 ; LV # Lo HANGUL SYLLABLE RYA
B7D0 ; LV # Lo HANGUL SYLLABLE RYAE
B7EC ; LV # Lo HANGUL SYLLABLE REO
B808 ; LV # Lo HANGUL SYLLABLE RE
B824 ; LV # Lo HANGUL SYLLABLE RYEO
B840 ; LV # Lo HANGUL SYLLABLE RYE
B85C ; LV # Lo HANGUL SYLLABLE RO
B878 ; LV # Lo HANGUL SYLLABLE RWA
B894 ; LV # Lo HANGUL SYLLABLE RWAE
B8B0 ; LV # Lo HANGUL SYLLABLE ROE
B8CC ; LV # Lo HANGUL SYLLABLE RYO
B8E8 ; LV # Lo HANGUL SYLLABLE RU
B904 ; LV # Lo HANGUL SYLLABLE RWEO
B920 ; LV # Lo HANGUL SYLLABLE RWE
B93C ; LV # Lo HANGUL SYLLABLE RWI
B958 ; LV # Lo HANGUL SYLLABLE RYU
B974 ; LV # Lo HANGUL SYLLABLE REU
B990 ; LV # Lo HANGUL SYLLABLE RYI
B9AC ; LV # Lo HANGUL SYLLABLE RI
B9C8 ; LV # Lo HANGUL SYLLABLE MA
B9E4 ; LV # Lo HANGUL SYLLABLE MAE
BA00 ; LV # Lo HANGUL SYLLABLE MYA
BA1C ; LV # Lo HANGUL SYLLABLE MYAE
BA38 ; LV # Lo HANGUL SYLLABLE MEO
BA54 ; LV # Lo HANGUL SYLLABLE ME
BA70 ; LV # Lo HANGUL SYLLABLE MYEO
BA8C ; LV # Lo HANGUL SYLLABLE MYE
BAA8 ; LV # Lo HANGUL SYLLABLE MO
BAC4 ; LV # Lo HANGUL SYLLABLE MWA
BAE0 ; LV # Lo HANGUL SYLLABLE MWAE
BAFC ; LV # Lo HANGUL SYLLABLE MOE
BB18 ; LV # Lo HANGUL SYLLABLE MYO
BB34 ; LV # Lo HANGUL SYLLABLE MU
BB50 ; LV # Lo HANGUL SYLLABLE MWEO
BB6C ; LV # Lo HANGUL SYLLABLE MWE
BB88 ; LV # Lo HANGUL SYLLABLE MWI
BBA4 ; LV # Lo HANGUL SYLLABLE MYU
BBC0 ; LV # Lo HANGUL SYLLABLE MEU
BBDC ; LV # Lo HANGUL SYLLABLE MYI
BBF8 ; LV # Lo HANGUL SYLLABLE MI
BC14 ; LV # Lo HANGUL SYLLABLE BA
BC30 ; LV # Lo HANGUL SYLLABLE BAE
BC4C ; LV # Lo HANGUL SYLLABLE BYA
BC68 ; LV # Lo HANGUL SYLLABLE BYAE
BC84 ; LV # Lo HANGUL SYLLABLE BEO
BCA0 ; LV # Lo HANGUL SYLLABLE BE
BCBC ; LV # Lo HANGUL SYLLABLE BYEO
BCD8 ; LV # Lo HANGUL SYLLABLE BYE
BCF4 ; LV # Lo HANGUL SYLLABLE BO
BD10 ; LV # Lo HANGUL SYLLABLE BWA
BD2C ; LV # Lo HANGUL SYLLABLE BWAE
BD48 ; LV # Lo HANGUL SYLLABLE BOE
BD64 ; LV # Lo HANGUL SYLLABLE BYO
BD80 ; LV # Lo HANGUL SYLLABLE BU
BD9C ; LV # Lo HANGUL SYLLABLE BWEO
BDB8 ; LV # Lo HANGUL SYLLABLE BWE
BDD4 ; LV # Lo HANGUL SYLLABLE BWI
BDF0 ; LV # Lo HANGUL SYLLABLE BYU
BE0C ; LV # Lo HANGUL SYLLABLE BEU
BE28 ; LV # Lo HANGUL SYLLABLE BYI
BE44 ; LV # Lo HANGUL SYLLABLE BI
BE60 ; LV # Lo HANGUL SYLLABLE BBA
BE7C ; LV # Lo HANGUL SYLLABLE BBAE
BE98 ; LV # Lo HANGUL SYLLABLE BBYA
BEB4 ; LV # Lo HANGUL SYLLABLE BBYAE
BED0 ; LV # Lo HANGUL SYLLABLE BBEO
BEEC ; LV # Lo HANGUL SYLLABLE BBE
BF08 ; LV # Lo HANGUL SYLLABLE BBYEO
BF24 ; LV # Lo HANGUL SYLLABLE BBYE
BF40 ; LV # Lo HANGUL SYLLABLE BBO
BF5C ; LV # Lo HANGUL SYLLABLE BBWA
BF78 ; LV # Lo HANGUL SYLLABLE BBWAE
BF94 ; LV # Lo HANGUL SYLLABLE BBOE
BFB0 ; LV # Lo HANGUL SYLLABLE BBYO
BFCC ; LV # Lo HANGUL SYLLABLE BBU
BFE8 ; LV # Lo HANGUL SYLLABLE BBWEO
C004 ; LV # Lo HANGUL SYLLABLE BBWE
C020 ; LV # Lo HANGUL SYLLABLE BBWI
C03C ; LV # Lo HANGUL SYLLABLE BBYU
C058 ; LV # Lo HANGUL SYLLABLE BBEU
C074 ; LV # Lo HANGUL SYLLABLE BBYI
C090 ; LV # Lo HANGUL SYLLABLE BBI
C0AC ; LV # Lo HANGUL SYLLABLE SA
C0C8 ; LV # Lo HANGUL SYLLABLE SAE
C0E4 ; LV # Lo HANGUL SYLLABLE SYA
C100 ; LV # Lo HANGUL SYLLABLE SYAE
C11C ; LV # Lo HANGUL SYLLABLE SEO
C138 ; LV # Lo HANGUL SYLLABLE SE
C154 ; LV # Lo HANGUL SYLLABLE SYEO
C170 ; LV # Lo HANGUL SYLLABLE SYE
C18C ; LV # Lo HANGUL SYLLABLE SO
C1A8 ; LV # Lo HANGUL SYLLABLE SWA
C1C4 ; LV # Lo HANGUL SYLLABLE SWAE
C1E0 ; LV # Lo HANGUL SYLLABLE SOE
C1FC ; LV # Lo HANGUL SYLLABLE SYO
C218 ; LV # Lo HANGUL SYLLABLE SU
C234 ; LV # Lo HANGUL SYLLABLE SWEO
C250 ; LV # Lo HANGUL SYLLABLE SWE
C26C ; LV # Lo HANGUL SYLLABLE SWI
C288 ; LV # Lo HANGUL SYLLABLE SYU
C2A4 ; LV # Lo HANGUL SYLLABLE SEU
C2C0 ; LV # Lo HANGUL SYLLABLE SYI
C2DC ; LV # Lo HANGUL SYLLABLE SI
C2F8 ; LV # Lo HANGUL SYLLABLE SSA
C314 ; LV # Lo HANGUL SYLLABLE SSAE
C330 ; LV # Lo HANGUL SYLLABLE SSYA
C34C ; LV # Lo HANGUL SYLLABLE SSYAE
C368 ; LV # Lo HANGUL SYLLABLE SSEO
C384 ; LV # Lo HANGUL SYLLABLE SSE
C3A0 ; LV # Lo HANGUL SYLLABLE SSYEO
C3BC ; LV # Lo HANGUL SYLLABLE SSYE
C3D8 ; LV # Lo HANGUL SYLLABLE SSO
C3F4 ; LV # Lo HANGUL SYLLABLE SSWA
C410 ; LV # Lo HANGUL SYLLABLE SSWAE
C42C ; LV # Lo HANGUL SYLLABLE SSOE
C448 ; LV # Lo HANGUL SYLLABLE SSYO
C464 ; LV # Lo HANGUL SYLLABLE SSU
C480 ; LV # Lo HANGUL SYLLABLE SSWEO
C49C ; LV # Lo HANGUL SYLLABLE SSWE
C4B8 ; LV # Lo HANGUL SYLLABLE SSWI
C4D4 ; LV # Lo HANGUL SYLLABLE SSYU
C4F0 ; LV # Lo HANGUL SYLLABLE SSEU
C50C ; LV # Lo HANGUL SYLLABLE SSYI
C528 ; LV # Lo HANGUL SYLLABLE SSI
C544 ; LV # Lo HANGUL SYLLABLE A
C560 ; LV # Lo HANGUL SYLLABLE AE
C57C ; LV # Lo HANGUL SYLLABLE YA
C598 ; LV # Lo HANGUL SYLLABLE YAE
C5B4 ; LV # Lo HANGUL SYLLABLE EO
C5D0 ; LV # Lo HANGUL SYLLABLE E
C5EC ; LV # Lo HANGUL SYLLABLE YEO
C608 ; LV # Lo HANGUL SYLLABLE YE
C624 ; LV # Lo HANGUL SYLLABLE O
C640 ; LV # Lo HANGUL SYLLABLE WA
C65C ; LV # Lo HANGUL SYLLABLE WAE
C678 ; LV # Lo HANGUL SYLLABLE OE
C694 ; LV # Lo HANGUL SYLLABLE YO
C6B0 ; LV # Lo HANGUL SYLLABLE U
C6CC ; LV # Lo HANGUL SYLLABLE WEO
C6E8 ; LV # Lo HANGUL SYLLABLE WE
C704 ; LV # Lo HANGUL SYLLABLE WI
C720 ; LV # Lo HANGUL SYLLABLE YU
C73C ; LV # Lo HANGUL SYLLABLE EU
C758 ; LV # Lo HANGUL SYLLABLE YI
C774 ; LV # Lo HANGUL SYLLABLE I
C790 ; LV # Lo HANGUL SYLLABLE JA
C7AC ; LV # Lo HANGUL SYLLABLE JAE
C7C8 ; LV # Lo HANGUL SYLLABLE JYA
C7E4 ; LV # Lo HANGUL SYLLABLE JYAE
C800 ; LV # Lo HANGUL SYLLABLE JEO
C81C ; LV # Lo HANGUL SYLLABLE JE
C838 ; LV # Lo HANGUL SYLLABLE JYEO
C854 ; LV # Lo HANGUL SYLLABLE JYE
C870 ; LV # Lo HANGUL SYLLABLE JO
C88C ; LV # Lo HANGUL SYLLABLE JWA
C8A8 ; LV # Lo HANGUL SYLLABLE JWAE
C8C4 ; LV # Lo HANGUL SYLLABLE JOE
C8E0 ; LV # Lo HANGUL SYLLABLE JYO
C8FC ; LV # Lo HANGUL SYLLABLE JU
C918 ; LV # Lo HANGUL SYLLABLE JWEO
C934 ; LV # Lo HANGUL SYLLABLE JWE
C950 ; LV # Lo HANGUL SYLLABLE JWI
C96C ; LV # Lo HANGUL SYLLABLE JYU
C988 ; LV # Lo HANGUL SYLLABLE JEU
C9A4 ; LV # Lo HANGUL SYLLABLE JYI
C9C0 ; LV # Lo HANGUL SYLLABLE JI
C9DC ; LV # Lo HANGUL SYLLABLE JJA
C9F8 ; LV # Lo HANGUL SYLLABLE JJAE
CA14 ; LV # Lo HANGUL SYLLABLE JJYA
CA30 ; LV # Lo HANGUL SYLLABLE JJYAE
CA4C ; LV # Lo HANGUL SYLLABLE JJEO
CA68 ; LV # Lo HANGUL SYLLABLE JJE
CA84 ; LV # Lo HANGUL SYLLABLE JJYEO
CAA0 ; LV # Lo HANGUL SYLLABLE JJYE
CABC ; LV # Lo HANGUL SYLLABLE JJO
CAD8 ; LV # Lo HANGUL SYLLABLE JJWA
CAF4 ; LV # Lo HANGUL SYLLABLE JJWAE
CB10 ; LV # Lo HANGUL SYLLABLE JJOE
CB2C ; LV # Lo HANGUL SYLLABLE JJYO
CB48 ; LV # Lo HANGUL SYLLABLE JJU
CB64 ; LV # Lo HANGUL SYLLABLE JJWEO
CB80 ; LV # Lo HANGUL SYLLABLE JJWE
CB9C ; LV # Lo HANGUL SYLLABLE JJWI
CBB8 ; LV # Lo HANGUL SYLLABLE JJYU
CBD4 ; LV # Lo HANGUL SYLLABLE JJEU
CBF0 ; LV # Lo HANGUL SYLLABLE JJYI
CC0C ; LV # Lo HANGUL SYLLABLE JJI
CC28 ; LV # Lo HANGUL SYLLABLE CA
CC44 ; LV # Lo HANGUL SYLLABLE CAE
CC60 ; LV # Lo HANGUL SYLLABLE CYA
CC7C ; LV # Lo HANGUL SYLLABLE CYAE
CC98 ; LV # Lo HANGUL SYLLABLE CEO
CCB4 ; LV # Lo HANGUL SYLLABLE CE
CCD0 ; LV # Lo HANGUL SYLLABLE CYEO
CCEC ; LV # Lo HANGUL SYLLABLE CYE
CD08 ; LV # Lo HANGUL SYLLABLE CO
CD24 ; LV # Lo HANGUL SYLLABLE CWA
CD40 ; LV # Lo HANGUL SYLLABLE CWAE
CD5C ; LV # Lo HANGUL SYLLABLE COE
CD78 ; LV # Lo HANGUL SYLLABLE CYO
CD94 ; LV # Lo HANGUL SYLLABLE CU
CDB0 ; LV # Lo HANGUL SYLLABLE CWEO
CDCC ; LV # Lo HANGUL SYLLABLE CWE
CDE8 ; LV # Lo HANGUL SYLLABLE CWI
CE04 ; LV # Lo HANGUL SYLLABLE CYU
CE20 ; LV # Lo HANGUL SYLLABLE CEU
CE3C ; LV # Lo HANGUL SYLLABLE CYI
CE58 ; LV # Lo HANGUL SYLLABLE CI
CE74 ; LV # Lo HANGUL SYLLABLE KA
CE90 ; LV # Lo HANGUL SYLLABLE KAE
CEAC ; LV # Lo HANGUL SYLLABLE KYA
CEC8 ; LV # Lo HANGUL SYLLABLE KYAE
CEE4 ; LV # Lo HANGUL SYLLABLE KEO
CF00 ; LV # Lo HANGUL SYLLABLE KE
CF1C ; LV # Lo HANGUL SYLLABLE KYEO
CF38 ; LV # Lo HANGUL SYLLABLE KYE
CF54 ; LV # Lo HANGUL SYLLABLE KO
CF70 ; LV # Lo HANGUL SYLLABLE KWA
CF8C ; LV # Lo HANGUL SYLLABLE KWAE
CFA8 ; LV # Lo HANGUL SYLLABLE KOE
CFC4 ; LV # Lo HANGUL SYLLABLE KYO
CFE0 ; LV # Lo HANGUL SYLLABLE KU
CFFC ; LV # Lo HANGUL SYLLABLE KWEO
D018 ; LV # Lo HANGUL SYLLABLE KWE
D034 ; LV # Lo HANGUL SYLLABLE KWI
D050 ; LV # Lo HANGUL SYLLABLE KYU
D06C ; LV # Lo HANGUL SYLLABLE KEU
D088 ; LV # Lo HANGUL SYLLABLE KYI
D0A4 ; LV # Lo HANGUL SYLLABLE KI
D0C0 ; LV # Lo HANGUL SYLLABLE TA
D0DC ; LV # Lo HANGUL SYLLABLE TAE
D0F8 ; LV # Lo HANGUL SYLLABLE TYA
D114 ; LV # Lo HANGUL SYLLABLE TYAE
D130 ; LV # Lo HANGUL SYLLABLE TEO
D14C ; LV # Lo HANGUL SYLLABLE TE
D168 ; LV # Lo HANGUL SYLLABLE TYEO
D184 ; LV # Lo HANGUL SYLLABLE TYE
D1A0 ; LV # Lo HANGUL SYLLABLE TO
D1BC ; LV # Lo HANGUL SYLLABLE TWA
D1D8 ; LV # Lo HANGUL SYLLABLE TWAE
D1F4 ; LV # Lo HANGUL SYLLABLE TOE
D210 ; LV # Lo HANGUL SYLLABLE TYO
D22C ; LV # Lo HANGUL SYLLABLE TU
D248 ; LV # Lo HANGUL SYLLABLE TWEO
D264 ; LV # Lo HANGUL SYLLABLE TWE
D280 ; LV # Lo HANGUL SYLLABLE TWI
D29C ; LV # Lo HANGUL SYLLABLE TYU
D2B8 ; LV # Lo HANGUL SYLLABLE TEU
D2D4 ; LV # Lo HANGUL SYLLABLE TYI
D2F0 ; LV # Lo HANGUL SYLLABLE TI
D30C ; LV # Lo HANGUL SYLLABLE PA
D328 ; LV # Lo HANGUL SYLLABLE PAE
D344 ; LV # Lo HANGUL SYLLABLE PYA
D360 ; LV # Lo HANGUL SYLLABLE PYAE
D37C ; LV # Lo HANGUL SYLLABLE PEO
D398 ; LV # Lo HANGUL SYLLABLE PE
D3B4 ; LV # Lo HANGUL SYLLABLE PYEO
D3D0 ; LV # Lo HANGUL SYLLABLE PYE
D3EC ; LV # Lo HANGUL SYLLABLE PO
D408 ; LV # Lo HANGUL SYLLABLE PWA
D424 ; LV # Lo HANGUL SYLLABLE PWAE
D440 ; LV # Lo HANGUL SYLLABLE POE
D45C ; LV # Lo HANGUL SYLLABLE PYO
D478 ; LV # Lo HANGUL SYLLABLE PU
D494 ; LV # Lo HANGUL SYLLABLE PWEO
D4B0 ; LV # Lo HANGUL SYLLABLE PWE
D4CC ; LV # Lo HANGUL SYLLABLE PWI
D4E8 ; LV # Lo HANGUL SYLLABLE PYU
D504 ; LV # Lo HANGUL SYLLABLE PEU
D520 ; LV # Lo HANGUL SYLLABLE PYI
D53C ; LV # Lo HANGUL SYLLABLE PI
D558 ; LV # Lo HANGUL SYLLABLE HA
D574 ; LV # Lo HANGUL SYLLABLE HAE
D590 ; LV # Lo HANGUL SYLLABLE HYA
D5AC ; LV # Lo HANGUL SYLLABLE HYAE
D5C8 ; LV # Lo HANGUL SYLLABLE HEO
D5E4 ; LV # Lo HANGUL SYLLABLE HE
D600 ; LV # Lo HANGUL SYLLABLE HYEO
D61C ; LV # Lo HANGUL SYLLABLE HYE
D638 ; LV # Lo HANGUL SYLLABLE HO
D654 ; LV # Lo HANGUL SYLLABLE HWA
D670 ; LV # Lo HANGUL SYLLABLE HWAE
D68C ; LV # Lo HANGUL SYLLABLE HOE
D6A8 ; LV # Lo HANGUL SYLLABLE HYO
D6C4 ; LV # Lo HANGUL SYLLABLE HU
D6E0 ; LV # Lo HANGUL SYLLABLE HWEO
D6FC ; LV # Lo HANGUL SYLLABLE HWE
D718 ; LV # Lo HANGUL SYLLABLE HWI
D734 ; LV # Lo HANGUL SYLLABLE HYU
D750 ; LV # Lo HANGUL SYLLABLE HEU
D76C ; LV # Lo HANGUL SYLLABLE HYI
D788 ; LV # Lo HANGUL SYLLABLE HI
# Total code points: 399
# ================================================
# Hangul_Syllable_Type=LVT_Syllable
AC01..AC1B ; LVT # Lo [27] HANGUL SYLLABLE GAG..HANGUL SYLLABLE GAH
AC1D..AC37 ; LVT # Lo [27] HANGUL SYLLABLE GAEG..HANGUL SYLLABLE GAEH
AC39..AC53 ; LVT # Lo [27] HANGUL SYLLABLE GYAG..HANGUL SYLLABLE GYAH
AC55..AC6F ; LVT # Lo [27] HANGUL SYLLABLE GYAEG..HANGUL SYLLABLE GYAEH
AC71..AC8B ; LVT # Lo [27] HANGUL SYLLABLE GEOG..HANGUL SYLLABLE GEOH
AC8D..ACA7 ; LVT # Lo [27] HANGUL SYLLABLE GEG..HANGUL SYLLABLE GEH
ACA9..ACC3 ; LVT # Lo [27] HANGUL SYLLABLE GYEOG..HANGUL SYLLABLE GYEOH
ACC5..ACDF ; LVT # Lo [27] HANGUL SYLLABLE GYEG..HANGUL SYLLABLE GYEH
ACE1..ACFB ; LVT # Lo [27] HANGUL SYLLABLE GOG..HANGUL SYLLABLE GOH
ACFD..AD17 ; LVT # Lo [27] HANGUL SYLLABLE GWAG..HANGUL SYLLABLE GWAH
AD19..AD33 ; LVT # Lo [27] HANGUL SYLLABLE GWAEG..HANGUL SYLLABLE GWAEH
AD35..AD4F ; LVT # Lo [27] HANGUL SYLLABLE GOEG..HANGUL SYLLABLE GOEH
AD51..AD6B ; LVT # Lo [27] HANGUL SYLLABLE GYOG..HANGUL SYLLABLE GYOH
AD6D..AD87 ; LVT # Lo [27] HANGUL SYLLABLE GUG..HANGUL SYLLABLE GUH
AD89..ADA3 ; LVT # Lo [27] HANGUL SYLLABLE GWEOG..HANGUL SYLLABLE GWEOH
ADA5..ADBF ; LVT # Lo [27] HANGUL SYLLABLE GWEG..HANGUL SYLLABLE GWEH
ADC1..ADDB ; LVT # Lo [27] HANGUL SYLLABLE GWIG..HANGUL SYLLABLE GWIH
ADDD..ADF7 ; LVT # Lo [27] HANGUL SYLLABLE GYUG..HANGUL SYLLABLE GYUH
ADF9..AE13 ; LVT # Lo [27] HANGUL SYLLABLE GEUG..HANGUL SYLLABLE GEUH
AE15..AE2F ; LVT # Lo [27] HANGUL SYLLABLE GYIG..HANGUL SYLLABLE GYIH
AE31..AE4B ; LVT # Lo [27] HANGUL SYLLABLE GIG..HANGUL SYLLABLE GIH
AE4D..AE67 ; LVT # Lo [27] HANGUL SYLLABLE GGAG..HANGUL SYLLABLE GGAH
AE69..AE83 ; LVT # Lo [27] HANGUL SYLLABLE GGAEG..HANGUL SYLLABLE GGAEH
AE85..AE9F ; LVT # Lo [27] HANGUL SYLLABLE GGYAG..HANGUL SYLLABLE GGYAH
AEA1..AEBB ; LVT # Lo [27] HANGUL SYLLABLE GGYAEG..HANGUL SYLLABLE GGYAEH
AEBD..AED7 ; LVT # Lo [27] HANGUL SYLLABLE GGEOG..HANGUL SYLLABLE GGEOH
AED9..AEF3 ; LVT # Lo [27] HANGUL SYLLABLE GGEG..HANGUL SYLLABLE GGEH
AEF5..AF0F ; LVT # Lo [27] HANGUL SYLLABLE GGYEOG..HANGUL SYLLABLE GGYEOH
AF11..AF2B ; LVT # Lo [27] HANGUL SYLLABLE GGYEG..HANGUL SYLLABLE GGYEH
AF2D..AF47 ; LVT # Lo [27] HANGUL SYLLABLE GGOG..HANGUL SYLLABLE GGOH
AF49..AF63 ; LVT # Lo [27] HANGUL SYLLABLE GGWAG..HANGUL SYLLABLE GGWAH
AF65..AF7F ; LVT # Lo [27] HANGUL SYLLABLE GGWAEG..HANGUL SYLLABLE GGWAEH
AF81..AF9B ; LVT # Lo [27] HANGUL SYLLABLE GGOEG..HANGUL SYLLABLE GGOEH
AF9D..AFB7 ; LVT # Lo [27] HANGUL SYLLABLE GGYOG..HANGUL SYLLABLE GGYOH
AFB9..AFD3 ; LVT # Lo [27] HANGUL SYLLABLE GGUG..HANGUL SYLLABLE GGUH
AFD5..AFEF ; LVT # Lo [27] HANGUL SYLLABLE GGWEOG..HANGUL SYLLABLE GGWEOH
AFF1..B00B ; LVT # Lo [27] HANGUL SYLLABLE GGWEG..HANGUL SYLLABLE GGWEH
B00D..B027 ; LVT # Lo [27] HANGUL SYLLABLE GGWIG..HANGUL SYLLABLE GGWIH
B029..B043 ; LVT # Lo [27] HANGUL SYLLABLE GGYUG..HANGUL SYLLABLE GGYUH
B045..B05F ; LVT # Lo [27] HANGUL SYLLABLE GGEUG..HANGUL SYLLABLE GGEUH
B061..B07B ; LVT # Lo [27] HANGUL SYLLABLE GGYIG..HANGUL SYLLABLE GGYIH
B07D..B097 ; LVT # Lo [27] HANGUL SYLLABLE GGIG..HANGUL SYLLABLE GGIH
B099..B0B3 ; LVT # Lo [27] HANGUL SYLLABLE NAG..HANGUL SYLLABLE NAH
B0B5..B0CF ; LVT # Lo [27] HANGUL SYLLABLE NAEG..HANGUL SYLLABLE NAEH
B0D1..B0EB ; LVT # Lo [27] HANGUL SYLLABLE NYAG..HANGUL SYLLABLE NYAH
B0ED..B107 ; LVT # Lo [27] HANGUL SYLLABLE NYAEG..HANGUL SYLLABLE NYAEH
B109..B123 ; LVT # Lo [27] HANGUL SYLLABLE NEOG..HANGUL SYLLABLE NEOH
B125..B13F ; LVT # Lo [27] HANGUL SYLLABLE NEG..HANGUL SYLLABLE NEH
B141..B15B ; LVT # Lo [27] HANGUL SYLLABLE NYEOG..HANGUL SYLLABLE NYEOH
B15D..B177 ; LVT # Lo [27] HANGUL SYLLABLE NYEG..HANGUL SYLLABLE NYEH
B179..B193 ; LVT # Lo [27] HANGUL SYLLABLE NOG..HANGUL SYLLABLE NOH
B195..B1AF ; LVT # Lo [27] HANGUL SYLLABLE NWAG..HANGUL SYLLABLE NWAH
B1B1..B1CB ; LVT # Lo [27] HANGUL SYLLABLE NWAEG..HANGUL SYLLABLE NWAEH
B1CD..B1E7 ; LVT # Lo [27] HANGUL SYLLABLE NOEG..HANGUL SYLLABLE NOEH
B1E9..B203 ; LVT # Lo [27] HANGUL SYLLABLE NYOG..HANGUL SYLLABLE NYOH
B205..B21F ; LVT # Lo [27] HANGUL SYLLABLE NUG..HANGUL SYLLABLE NUH
B221..B23B ; LVT # Lo [27] HANGUL SYLLABLE NWEOG..HANGUL SYLLABLE NWEOH
B23D..B257 ; LVT # Lo [27] HANGUL SYLLABLE NWEG..HANGUL SYLLABLE NWEH
B259..B273 ; LVT # Lo [27] HANGUL SYLLABLE NWIG..HANGUL SYLLABLE NWIH
B275..B28F ; LVT # Lo [27] HANGUL SYLLABLE NYUG..HANGUL SYLLABLE NYUH
B291..B2AB ; LVT # Lo [27] HANGUL SYLLABLE NEUG..HANGUL SYLLABLE NEUH
B2AD..B2C7 ; LVT # Lo [27] HANGUL SYLLABLE NYIG..HANGUL SYLLABLE NYIH
B2C9..B2E3 ; LVT # Lo [27] HANGUL SYLLABLE NIG..HANGUL SYLLABLE NIH
B2E5..B2FF ; LVT # Lo [27] HANGUL SYLLABLE DAG..HANGUL SYLLABLE DAH
B301..B31B ; LVT # Lo [27] HANGUL SYLLABLE DAEG..HANGUL SYLLABLE DAEH
B31D..B337 ; LVT # Lo [27] HANGUL SYLLABLE DYAG..HANGUL SYLLABLE DYAH
B339..B353 ; LVT # Lo [27] HANGUL SYLLABLE DYAEG..HANGUL SYLLABLE DYAEH
B355..B36F ; LVT # Lo [27] HANGUL SYLLABLE DEOG..HANGUL SYLLABLE DEOH
B371..B38B ; LVT # Lo [27] HANGUL SYLLABLE DEG..HANGUL SYLLABLE DEH
B38D..B3A7 ; LVT # Lo [27] HANGUL SYLLABLE DYEOG..HANGUL SYLLABLE DYEOH
B3A9..B3C3 ; LVT # Lo [27] HANGUL SYLLABLE DYEG..HANGUL SYLLABLE DYEH
B3C5..B3DF ; LVT # Lo [27] HANGUL SYLLABLE DOG..HANGUL SYLLABLE DOH
B3E1..B3FB ; LVT # Lo [27] HANGUL SYLLABLE DWAG..HANGUL SYLLABLE DWAH
B3FD..B417 ; LVT # Lo [27] HANGUL SYLLABLE DWAEG..HANGUL SYLLABLE DWAEH
B419..B433 ; LVT # Lo [27] HANGUL SYLLABLE DOEG..HANGUL SYLLABLE DOEH
B435..B44F ; LVT # Lo [27] HANGUL SYLLABLE DYOG..HANGUL SYLLABLE DYOH
B451..B46B ; LVT # Lo [27] HANGUL SYLLABLE DUG..HANGUL SYLLABLE DUH
B46D..B487 ; LVT # Lo [27] HANGUL SYLLABLE DWEOG..HANGUL SYLLABLE DWEOH
B489..B4A3 ; LVT # Lo [27] HANGUL SYLLABLE DWEG..HANGUL SYLLABLE DWEH
B4A5..B4BF ; LVT # Lo [27] HANGUL SYLLABLE DWIG..HANGUL SYLLABLE DWIH
B4C1..B4DB ; LVT # Lo [27] HANGUL SYLLABLE DYUG..HANGUL SYLLABLE DYUH
B4DD..B4F7 ; LVT # Lo [27] HANGUL SYLLABLE DEUG..HANGUL SYLLABLE DEUH
B4F9..B513 ; LVT # Lo [27] HANGUL SYLLABLE DYIG..HANGUL SYLLABLE DYIH
B515..B52F ; LVT # Lo [27] HANGUL SYLLABLE DIG..HANGUL SYLLABLE DIH
B531..B54B ; LVT # Lo [27] HANGUL SYLLABLE DDAG..HANGUL SYLLABLE DDAH
B54D..B567 ; LVT # Lo [27] HANGUL SYLLABLE DDAEG..HANGUL SYLLABLE DDAEH
B569..B583 ; LVT # Lo [27] HANGUL SYLLABLE DDYAG..HANGUL SYLLABLE DDYAH
B585..B59F ; LVT # Lo [27] HANGUL SYLLABLE DDYAEG..HANGUL SYLLABLE DDYAEH
B5A1..B5BB ; LVT # Lo [27] HANGUL SYLLABLE DDEOG..HANGUL SYLLABLE DDEOH
B5BD..B5D7 ; LVT # Lo [27] HANGUL SYLLABLE DDEG..HANGUL SYLLABLE DDEH
B5D9..B5F3 ; LVT # Lo [27] HANGUL SYLLABLE DDYEOG..HANGUL SYLLABLE DDYEOH
B5F5..B60F ; LVT # Lo [27] HANGUL SYLLABLE DDYEG..HANGUL SYLLABLE DDYEH
B611..B62B ; LVT # Lo [27] HANGUL SYLLABLE DDOG..HANGUL SYLLABLE DDOH
B62D..B647 ; LVT # Lo [27] HANGUL SYLLABLE DDWAG..HANGUL SYLLABLE DDWAH
B649..B663 ; LVT # Lo [27] HANGUL SYLLABLE DDWAEG..HANGUL SYLLABLE DDWAEH
B665..B67F ; LVT # Lo [27] HANGUL SYLLABLE DDOEG..HANGUL SYLLABLE DDOEH
B681..B69B ; LVT # Lo [27] HANGUL SYLLABLE DDYOG..HANGUL SYLLABLE DDYOH
B69D..B6B7 ; LVT # Lo [27] HANGUL SYLLABLE DDUG..HANGUL SYLLABLE DDUH
B6B9..B6D3 ; LVT # Lo [27] HANGUL SYLLABLE DDWEOG..HANGUL SYLLABLE DDWEOH
B6D5..B6EF ; LVT # Lo [27] HANGUL SYLLABLE DDWEG..HANGUL SYLLABLE DDWEH
B6F1..B70B ; LVT # Lo [27] HANGUL SYLLABLE DDWIG..HANGUL SYLLABLE DDWIH
B70D..B727 ; LVT # Lo [27] HANGUL SYLLABLE DDYUG..HANGUL SYLLABLE DDYUH
B729..B743 ; LVT # Lo [27] HANGUL SYLLABLE DDEUG..HANGUL SYLLABLE DDEUH
B745..B75F ; LVT # Lo [27] HANGUL SYLLABLE DDYIG..HANGUL SYLLABLE DDYIH
B761..B77B ; LVT # Lo [27] HANGUL SYLLABLE DDIG..HANGUL SYLLABLE DDIH
B77D..B797 ; LVT # Lo [27] HANGUL SYLLABLE RAG..HANGUL SYLLABLE RAH
B799..B7B3 ; LVT # Lo [27] HANGUL SYLLABLE RAEG..HANGUL SYLLABLE RAEH
B7B5..B7CF ; LVT # Lo [27] HANGUL SYLLABLE RYAG..HANGUL SYLLABLE RYAH
B7D1..B7EB ; LVT # Lo [27] HANGUL SYLLABLE RYAEG..HANGUL SYLLABLE RYAEH
B7ED..B807 ; LVT # Lo [27] HANGUL SYLLABLE REOG..HANGUL SYLLABLE REOH
B809..B823 ; LVT # Lo [27] HANGUL SYLLABLE REG..HANGUL SYLLABLE REH
B825..B83F ; LVT # Lo [27] HANGUL SYLLABLE RYEOG..HANGUL SYLLABLE RYEOH
B841..B85B ; LVT # Lo [27] HANGUL SYLLABLE RYEG..HANGUL SYLLABLE RYEH
B85D..B877 ; LVT # Lo [27] HANGUL SYLLABLE ROG..HANGUL SYLLABLE ROH
B879..B893 ; LVT # Lo [27] HANGUL SYLLABLE RWAG..HANGUL SYLLABLE RWAH
B895..B8AF ; LVT # Lo [27] HANGUL SYLLABLE RWAEG..HANGUL SYLLABLE RWAEH
B8B1..B8CB ; LVT # Lo [27] HANGUL SYLLABLE ROEG..HANGUL SYLLABLE ROEH
B8CD..B8E7 ; LVT # Lo [27] HANGUL SYLLABLE RYOG..HANGUL SYLLABLE RYOH
B8E9..B903 ; LVT # Lo [27] HANGUL SYLLABLE RUG..HANGUL SYLLABLE RUH
B905..B91F ; LVT # Lo [27] HANGUL SYLLABLE RWEOG..HANGUL SYLLABLE RWEOH
B921..B93B ; LVT # Lo [27] HANGUL SYLLABLE RWEG..HANGUL SYLLABLE RWEH
B93D..B957 ; LVT # Lo [27] HANGUL SYLLABLE RWIG..HANGUL SYLLABLE RWIH
B959..B973 ; LVT # Lo [27] HANGUL SYLLABLE RYUG..HANGUL SYLLABLE RYUH
B975..B98F ; LVT # Lo [27] HANGUL SYLLABLE REUG..HANGUL SYLLABLE REUH
B991..B9AB ; LVT # Lo [27] HANGUL SYLLABLE RYIG..HANGUL SYLLABLE RYIH
B9AD..B9C7 ; LVT # Lo [27] HANGUL SYLLABLE RIG..HANGUL SYLLABLE RIH
B9C9..B9E3 ; LVT # Lo [27] HANGUL SYLLABLE MAG..HANGUL SYLLABLE MAH
B9E5..B9FF ; LVT # Lo [27] HANGUL SYLLABLE MAEG..HANGUL SYLLABLE MAEH
BA01..BA1B ; LVT # Lo [27] HANGUL SYLLABLE MYAG..HANGUL SYLLABLE MYAH
BA1D..BA37 ; LVT # Lo [27] HANGUL SYLLABLE MYAEG..HANGUL SYLLABLE MYAEH
BA39..BA53 ; LVT # Lo [27] HANGUL SYLLABLE MEOG..HANGUL SYLLABLE MEOH
BA55..BA6F ; LVT # Lo [27] HANGUL SYLLABLE MEG..HANGUL SYLLABLE MEH
BA71..BA8B ; LVT # Lo [27] HANGUL SYLLABLE MYEOG..HANGUL SYLLABLE MYEOH
BA8D..BAA7 ; LVT # Lo [27] HANGUL SYLLABLE MYEG..HANGUL SYLLABLE MYEH
BAA9..BAC3 ; LVT # Lo [27] HANGUL SYLLABLE MOG..HANGUL SYLLABLE MOH
BAC5..BADF ; LVT # Lo [27] HANGUL SYLLABLE MWAG..HANGUL SYLLABLE MWAH
BAE1..BAFB ; LVT # Lo [27] HANGUL SYLLABLE MWAEG..HANGUL SYLLABLE MWAEH
BAFD..BB17 ; LVT # Lo [27] HANGUL SYLLABLE MOEG..HANGUL SYLLABLE MOEH
BB19..BB33 ; LVT # Lo [27] HANGUL SYLLABLE MYOG..HANGUL SYLLABLE MYOH
BB35..BB4F ; LVT # Lo [27] HANGUL SYLLABLE MUG..HANGUL SYLLABLE MUH
BB51..BB6B ; LVT # Lo [27] HANGUL SYLLABLE MWEOG..HANGUL SYLLABLE MWEOH
BB6D..BB87 ; LVT # Lo [27] HANGUL SYLLABLE MWEG..HANGUL SYLLABLE MWEH
BB89..BBA3 ; LVT # Lo [27] HANGUL SYLLABLE MWIG..HANGUL SYLLABLE MWIH
BBA5..BBBF ; LVT # Lo [27] HANGUL SYLLABLE MYUG..HANGUL SYLLABLE MYUH
BBC1..BBDB ; LVT # Lo [27] HANGUL SYLLABLE MEUG..HANGUL SYLLABLE MEUH
BBDD..BBF7 ; LVT # Lo [27] HANGUL SYLLABLE MYIG..HANGUL SYLLABLE MYIH
BBF9..BC13 ; LVT # Lo [27] HANGUL SYLLABLE MIG..HANGUL SYLLABLE MIH
BC15..BC2F ; LVT # Lo [27] HANGUL SYLLABLE BAG..HANGUL SYLLABLE BAH
BC31..BC4B ; LVT # Lo [27] HANGUL SYLLABLE BAEG..HANGUL SYLLABLE BAEH
BC4D..BC67 ; LVT # Lo [27] HANGUL SYLLABLE BYAG..HANGUL SYLLABLE BYAH
BC69..BC83 ; LVT # Lo [27] HANGUL SYLLABLE BYAEG..HANGUL SYLLABLE BYAEH
BC85..BC9F ; LVT # Lo [27] HANGUL SYLLABLE BEOG..HANGUL SYLLABLE BEOH
BCA1..BCBB ; LVT # Lo [27] HANGUL SYLLABLE BEG..HANGUL SYLLABLE BEH
BCBD..BCD7 ; LVT # Lo [27] HANGUL SYLLABLE BYEOG..HANGUL SYLLABLE BYEOH
BCD9..BCF3 ; LVT # Lo [27] HANGUL SYLLABLE BYEG..HANGUL SYLLABLE BYEH
BCF5..BD0F ; LVT # Lo [27] HANGUL SYLLABLE BOG..HANGUL SYLLABLE BOH
BD11..BD2B ; LVT # Lo [27] HANGUL SYLLABLE BWAG..HANGUL SYLLABLE BWAH
BD2D..BD47 ; LVT # Lo [27] HANGUL SYLLABLE BWAEG..HANGUL SYLLABLE BWAEH
BD49..BD63 ; LVT # Lo [27] HANGUL SYLLABLE BOEG..HANGUL SYLLABLE BOEH
BD65..BD7F ; LVT # Lo [27] HANGUL SYLLABLE BYOG..HANGUL SYLLABLE BYOH
BD81..BD9B ; LVT # Lo [27] HANGUL SYLLABLE BUG..HANGUL SYLLABLE BUH
BD9D..BDB7 ; LVT # Lo [27] HANGUL SYLLABLE BWEOG..HANGUL SYLLABLE BWEOH
BDB9..BDD3 ; LVT # Lo [27] HANGUL SYLLABLE BWEG..HANGUL SYLLABLE BWEH
BDD5..BDEF ; LVT # Lo [27] HANGUL SYLLABLE BWIG..HANGUL SYLLABLE BWIH
BDF1..BE0B ; LVT # Lo [27] HANGUL SYLLABLE BYUG..HANGUL SYLLABLE BYUH
BE0D..BE27 ; LVT # Lo [27] HANGUL SYLLABLE BEUG..HANGUL SYLLABLE BEUH
BE29..BE43 ; LVT # Lo [27] HANGUL SYLLABLE BYIG..HANGUL SYLLABLE BYIH
BE45..BE5F ; LVT # Lo [27] HANGUL SYLLABLE BIG..HANGUL SYLLABLE BIH
BE61..BE7B ; LVT # Lo [27] HANGUL SYLLABLE BBAG..HANGUL SYLLABLE BBAH
BE7D..BE97 ; LVT # Lo [27] HANGUL SYLLABLE BBAEG..HANGUL SYLLABLE BBAEH
BE99..BEB3 ; LVT # Lo [27] HANGUL SYLLABLE BBYAG..HANGUL SYLLABLE BBYAH
BEB5..BECF ; LVT # Lo [27] HANGUL SYLLABLE BBYAEG..HANGUL SYLLABLE BBYAEH
BED1..BEEB ; LVT # Lo [27] HANGUL SYLLABLE BBEOG..HANGUL SYLLABLE BBEOH
BEED..BF07 ; LVT # Lo [27] HANGUL SYLLABLE BBEG..HANGUL SYLLABLE BBEH
BF09..BF23 ; LVT # Lo [27] HANGUL SYLLABLE BBYEOG..HANGUL SYLLABLE BBYEOH
BF25..BF3F ; LVT # Lo [27] HANGUL SYLLABLE BBYEG..HANGUL SYLLABLE BBYEH
BF41..BF5B ; LVT # Lo [27] HANGUL SYLLABLE BBOG..HANGUL SYLLABLE BBOH
BF5D..BF77 ; LVT # Lo [27] HANGUL SYLLABLE BBWAG..HANGUL SYLLABLE BBWAH
BF79..BF93 ; LVT # Lo [27] HANGUL SYLLABLE BBWAEG..HANGUL SYLLABLE BBWAEH
BF95..BFAF ; LVT # Lo [27] HANGUL SYLLABLE BBOEG..HANGUL SYLLABLE BBOEH
BFB1..BFCB ; LVT # Lo [27] HANGUL SYLLABLE BBYOG..HANGUL SYLLABLE BBYOH
BFCD..BFE7 ; LVT # Lo [27] HANGUL SYLLABLE BBUG..HANGUL SYLLABLE BBUH
BFE9..C003 ; LVT # Lo [27] HANGUL SYLLABLE BBWEOG..HANGUL SYLLABLE BBWEOH
C005..C01F ; LVT # Lo [27] HANGUL SYLLABLE BBWEG..HANGUL SYLLABLE BBWEH
C021..C03B ; LVT # Lo [27] HANGUL SYLLABLE BBWIG..HANGUL SYLLABLE BBWIH
C03D..C057 ; LVT # Lo [27] HANGUL SYLLABLE BBYUG..HANGUL SYLLABLE BBYUH
C059..C073 ; LVT # Lo [27] HANGUL SYLLABLE BBEUG..HANGUL SYLLABLE BBEUH
C075..C08F ; LVT # Lo [27] HANGUL SYLLABLE BBYIG..HANGUL SYLLABLE BBYIH
C091..C0AB ; LVT # Lo [27] HANGUL SYLLABLE BBIG..HANGUL SYLLABLE BBIH
C0AD..C0C7 ; LVT # Lo [27] HANGUL SYLLABLE SAG..HANGUL SYLLABLE SAH
C0C9..C0E3 ; LVT # Lo [27] HANGUL SYLLABLE SAEG..HANGUL SYLLABLE SAEH
C0E5..C0FF ; LVT # Lo [27] HANGUL SYLLABLE SYAG..HANGUL SYLLABLE SYAH
C101..C11B ; LVT # Lo [27] HANGUL SYLLABLE SYAEG..HANGUL SYLLABLE SYAEH
C11D..C137 ; LVT # Lo [27] HANGUL SYLLABLE SEOG..HANGUL SYLLABLE SEOH
C139..C153 ; LVT # Lo [27] HANGUL SYLLABLE SEG..HANGUL SYLLABLE SEH
C155..C16F ; LVT # Lo [27] HANGUL SYLLABLE SYEOG..HANGUL SYLLABLE SYEOH
C171..C18B ; LVT # Lo [27] HANGUL SYLLABLE SYEG..HANGUL SYLLABLE SYEH
C18D..C1A7 ; LVT # Lo [27] HANGUL SYLLABLE SOG..HANGUL SYLLABLE SOH
C1A9..C1C3 ; LVT # Lo [27] HANGUL SYLLABLE SWAG..HANGUL SYLLABLE SWAH
C1C5..C1DF ; LVT # Lo [27] HANGUL SYLLABLE SWAEG..HANGUL SYLLABLE SWAEH
C1E1..C1FB ; LVT # Lo [27] HANGUL SYLLABLE SOEG..HANGUL SYLLABLE SOEH
C1FD..C217 ; LVT # Lo [27] HANGUL SYLLABLE SYOG..HANGUL SYLLABLE SYOH
C219..C233 ; LVT # Lo [27] HANGUL SYLLABLE SUG..HANGUL SYLLABLE SUH
C235..C24F ; LVT # Lo [27] HANGUL SYLLABLE SWEOG..HANGUL SYLLABLE SWEOH
C251..C26B ; LVT # Lo [27] HANGUL SYLLABLE SWEG..HANGUL SYLLABLE SWEH
C26D..C287 ; LVT # Lo [27] HANGUL SYLLABLE SWIG..HANGUL SYLLABLE SWIH
C289..C2A3 ; LVT # Lo [27] HANGUL SYLLABLE SYUG..HANGUL SYLLABLE SYUH
C2A5..C2BF ; LVT # Lo [27] HANGUL SYLLABLE SEUG..HANGUL SYLLABLE SEUH
C2C1..C2DB ; LVT # Lo [27] HANGUL SYLLABLE SYIG..HANGUL SYLLABLE SYIH
C2DD..C2F7 ; LVT # Lo [27] HANGUL SYLLABLE SIG..HANGUL SYLLABLE SIH
C2F9..C313 ; LVT # Lo [27] HANGUL SYLLABLE SSAG..HANGUL SYLLABLE SSAH
C315..C32F ; LVT # Lo [27] HANGUL SYLLABLE SSAEG..HANGUL SYLLABLE SSAEH
C331..C34B ; LVT # Lo [27] HANGUL SYLLABLE SSYAG..HANGUL SYLLABLE SSYAH
C34D..C367 ; LVT # Lo [27] HANGUL SYLLABLE SSYAEG..HANGUL SYLLABLE SSYAEH
C369..C383 ; LVT # Lo [27] HANGUL SYLLABLE SSEOG..HANGUL SYLLABLE SSEOH
C385..C39F ; LVT # Lo [27] HANGUL SYLLABLE SSEG..HANGUL SYLLABLE SSEH
C3A1..C3BB ; LVT # Lo [27] HANGUL SYLLABLE SSYEOG..HANGUL SYLLABLE SSYEOH
C3BD..C3D7 ; LVT # Lo [27] HANGUL SYLLABLE SSYEG..HANGUL SYLLABLE SSYEH
C3D9..C3F3 ; LVT # Lo [27] HANGUL SYLLABLE SSOG..HANGUL SYLLABLE SSOH
C3F5..C40F ; LVT # Lo [27] HANGUL SYLLABLE SSWAG..HANGUL SYLLABLE SSWAH
C411..C42B ; LVT # Lo [27] HANGUL SYLLABLE SSWAEG..HANGUL SYLLABLE SSWAEH
C42D..C447 ; LVT # Lo [27] HANGUL SYLLABLE SSOEG..HANGUL SYLLABLE SSOEH
C449..C463 ; LVT # Lo [27] HANGUL SYLLABLE SSYOG..HANGUL SYLLABLE SSYOH
C465..C47F ; LVT # Lo [27] HANGUL SYLLABLE SSUG..HANGUL SYLLABLE SSUH
C481..C49B ; LVT # Lo [27] HANGUL SYLLABLE SSWEOG..HANGUL SYLLABLE SSWEOH
C49D..C4B7 ; LVT # Lo [27] HANGUL SYLLABLE SSWEG..HANGUL SYLLABLE SSWEH
C4B9..C4D3 ; LVT # Lo [27] HANGUL SYLLABLE SSWIG..HANGUL SYLLABLE SSWIH
C4D5..C4EF ; LVT # Lo [27] HANGUL SYLLABLE SSYUG..HANGUL SYLLABLE SSYUH
C4F1..C50B ; LVT # Lo [27] HANGUL SYLLABLE SSEUG..HANGUL SYLLABLE SSEUH
C50D..C527 ; LVT # Lo [27] HANGUL SYLLABLE SSYIG..HANGUL SYLLABLE SSYIH
C529..C543 ; LVT # Lo [27] HANGUL SYLLABLE SSIG..HANGUL SYLLABLE SSIH
C545..C55F ; LVT # Lo [27] HANGUL SYLLABLE AG..HANGUL SYLLABLE AH
C561..C57B ; LVT # Lo [27] HANGUL SYLLABLE AEG..HANGUL SYLLABLE AEH
C57D..C597 ; LVT # Lo [27] HANGUL SYLLABLE YAG..HANGUL SYLLABLE YAH
C599..C5B3 ; LVT # Lo [27] HANGUL SYLLABLE YAEG..HANGUL SYLLABLE YAEH
C5B5..C5CF ; LVT # Lo [27] HANGUL SYLLABLE EOG..HANGUL SYLLABLE EOH
C5D1..C5EB ; LVT # Lo [27] HANGUL SYLLABLE EG..HANGUL SYLLABLE EH
C5ED..C607 ; LVT # Lo [27] HANGUL SYLLABLE YEOG..HANGUL SYLLABLE YEOH
C609..C623 ; LVT # Lo [27] HANGUL SYLLABLE YEG..HANGUL SYLLABLE YEH
C625..C63F ; LVT # Lo [27] HANGUL SYLLABLE OG..HANGUL SYLLABLE OH
C641..C65B ; LVT # Lo [27] HANGUL SYLLABLE WAG..HANGUL SYLLABLE WAH
C65D..C677 ; LVT # Lo [27] HANGUL SYLLABLE WAEG..HANGUL SYLLABLE WAEH
C679..C693 ; LVT # Lo [27] HANGUL SYLLABLE OEG..HANGUL SYLLABLE OEH
C695..C6AF ; LVT # Lo [27] HANGUL SYLLABLE YOG..HANGUL SYLLABLE YOH
C6B1..C6CB ; LVT # Lo [27] HANGUL SYLLABLE UG..HANGUL SYLLABLE UH
C6CD..C6E7 ; LVT # Lo [27] HANGUL SYLLABLE WEOG..HANGUL SYLLABLE WEOH
C6E9..C703 ; LVT # Lo [27] HANGUL SYLLABLE WEG..HANGUL SYLLABLE WEH
C705..C71F ; LVT # Lo [27] HANGUL SYLLABLE WIG..HANGUL SYLLABLE WIH
C721..C73B ; LVT # Lo [27] HANGUL SYLLABLE YUG..HANGUL SYLLABLE YUH
C73D..C757 ; LVT # Lo [27] HANGUL SYLLABLE EUG..HANGUL SYLLABLE EUH
C759..C773 ; LVT # Lo [27] HANGUL SYLLABLE YIG..HANGUL SYLLABLE YIH
C775..C78F ; LVT # Lo [27] HANGUL SYLLABLE IG..HANGUL SYLLABLE IH
C791..C7AB ; LVT # Lo [27] HANGUL SYLLABLE JAG..HANGUL SYLLABLE JAH
C7AD..C7C7 ; LVT # Lo [27] HANGUL SYLLABLE JAEG..HANGUL SYLLABLE JAEH
C7C9..C7E3 ; LVT # Lo [27] HANGUL SYLLABLE JYAG..HANGUL SYLLABLE JYAH
C7E5..C7FF ; LVT # Lo [27] HANGUL SYLLABLE JYAEG..HANGUL SYLLABLE JYAEH
C801..C81B ; LVT # Lo [27] HANGUL SYLLABLE JEOG..HANGUL SYLLABLE JEOH
C81D..C837 ; LVT # Lo [27] HANGUL SYLLABLE JEG..HANGUL SYLLABLE JEH
C839..C853 ; LVT # Lo [27] HANGUL SYLLABLE JYEOG..HANGUL SYLLABLE JYEOH
C855..C86F ; LVT # Lo [27] HANGUL SYLLABLE JYEG..HANGUL SYLLABLE JYEH
C871..C88B ; LVT # Lo [27] HANGUL SYLLABLE JOG..HANGUL SYLLABLE JOH
C88D..C8A7 ; LVT # Lo [27] HANGUL SYLLABLE JWAG..HANGUL SYLLABLE JWAH
C8A9..C8C3 ; LVT # Lo [27] HANGUL SYLLABLE JWAEG..HANGUL SYLLABLE JWAEH
C8C5..C8DF ; LVT # Lo [27] HANGUL SYLLABLE JOEG..HANGUL SYLLABLE JOEH
C8E1..C8FB ; LVT # Lo [27] HANGUL SYLLABLE JYOG..HANGUL SYLLABLE JYOH
C8FD..C917 ; LVT # Lo [27] HANGUL SYLLABLE JUG..HANGUL SYLLABLE JUH
C919..C933 ; LVT # Lo [27] HANGUL SYLLABLE JWEOG..HANGUL SYLLABLE JWEOH
C935..C94F ; LVT # Lo [27] HANGUL SYLLABLE JWEG..HANGUL SYLLABLE JWEH
C951..C96B ; LVT # Lo [27] HANGUL SYLLABLE JWIG..HANGUL SYLLABLE JWIH
C96D..C987 ; LVT # Lo [27] HANGUL SYLLABLE JYUG..HANGUL SYLLABLE JYUH
C989..C9A3 ; LVT # Lo [27] HANGUL SYLLABLE JEUG..HANGUL SYLLABLE JEUH
C9A5..C9BF ; LVT # Lo [27] HANGUL SYLLABLE JYIG..HANGUL SYLLABLE JYIH
C9C1..C9DB ; LVT # Lo [27] HANGUL SYLLABLE JIG..HANGUL SYLLABLE JIH
C9DD..C9F7 ; LVT # Lo [27] HANGUL SYLLABLE JJAG..HANGUL SYLLABLE JJAH
C9F9..CA13 ; LVT # Lo [27] HANGUL SYLLABLE JJAEG..HANGUL SYLLABLE JJAEH
CA15..CA2F ; LVT # Lo [27] HANGUL SYLLABLE JJYAG..HANGUL SYLLABLE JJYAH
CA31..CA4B ; LVT # Lo [27] HANGUL SYLLABLE JJYAEG..HANGUL SYLLABLE JJYAEH
CA4D..CA67 ; LVT # Lo [27] HANGUL SYLLABLE JJEOG..HANGUL SYLLABLE JJEOH
CA69..CA83 ; LVT # Lo [27] HANGUL SYLLABLE JJEG..HANGUL SYLLABLE JJEH
CA85..CA9F ; LVT # Lo [27] HANGUL SYLLABLE JJYEOG..HANGUL SYLLABLE JJYEOH
CAA1..CABB ; LVT # Lo [27] HANGUL SYLLABLE JJYEG..HANGUL SYLLABLE JJYEH
CABD..CAD7 ; LVT # Lo [27] HANGUL SYLLABLE JJOG..HANGUL SYLLABLE JJOH
CAD9..CAF3 ; LVT # Lo [27] HANGUL SYLLABLE JJWAG..HANGUL SYLLABLE JJWAH
CAF5..CB0F ; LVT # Lo [27] HANGUL SYLLABLE JJWAEG..HANGUL SYLLABLE JJWAEH
CB11..CB2B ; LVT # Lo [27] HANGUL SYLLABLE JJOEG..HANGUL SYLLABLE JJOEH
CB2D..CB47 ; LVT # Lo [27] HANGUL SYLLABLE JJYOG..HANGUL SYLLABLE JJYOH
CB49..CB63 ; LVT # Lo [27] HANGUL SYLLABLE JJUG..HANGUL SYLLABLE JJUH
CB65..CB7F ; LVT # Lo [27] HANGUL SYLLABLE JJWEOG..HANGUL SYLLABLE JJWEOH
CB81..CB9B ; LVT # Lo [27] HANGUL SYLLABLE JJWEG..HANGUL SYLLABLE JJWEH
CB9D..CBB7 ; LVT # Lo [27] HANGUL SYLLABLE JJWIG..HANGUL SYLLABLE JJWIH
CBB9..CBD3 ; LVT # Lo [27] HANGUL SYLLABLE JJYUG..HANGUL SYLLABLE JJYUH
CBD5..CBEF ; LVT # Lo [27] HANGUL SYLLABLE JJEUG..HANGUL SYLLABLE JJEUH
CBF1..CC0B ; LVT # Lo [27] HANGUL SYLLABLE JJYIG..HANGUL SYLLABLE JJYIH
CC0D..CC27 ; LVT # Lo [27] HANGUL SYLLABLE JJIG..HANGUL SYLLABLE JJIH
CC29..CC43 ; LVT # Lo [27] HANGUL SYLLABLE CAG..HANGUL SYLLABLE CAH
CC45..CC5F ; LVT # Lo [27] HANGUL SYLLABLE CAEG..HANGUL SYLLABLE CAEH
CC61..CC7B ; LVT # Lo [27] HANGUL SYLLABLE CYAG..HANGUL SYLLABLE CYAH
CC7D..CC97 ; LVT # Lo [27] HANGUL SYLLABLE CYAEG..HANGUL SYLLABLE CYAEH
CC99..CCB3 ; LVT # Lo [27] HANGUL SYLLABLE CEOG..HANGUL SYLLABLE CEOH
CCB5..CCCF ; LVT # Lo [27] HANGUL SYLLABLE CEG..HANGUL SYLLABLE CEH
CCD1..CCEB ; LVT # Lo [27] HANGUL SYLLABLE CYEOG..HANGUL SYLLABLE CYEOH
CCED..CD07 ; LVT # Lo [27] HANGUL SYLLABLE CYEG..HANGUL SYLLABLE CYEH
CD09..CD23 ; LVT # Lo [27] HANGUL SYLLABLE COG..HANGUL SYLLABLE COH
CD25..CD3F ; LVT # Lo [27] HANGUL SYLLABLE CWAG..HANGUL SYLLABLE CWAH
CD41..CD5B ; LVT # Lo [27] HANGUL SYLLABLE CWAEG..HANGUL SYLLABLE CWAEH
CD5D..CD77 ; LVT # Lo [27] HANGUL SYLLABLE COEG..HANGUL SYLLABLE COEH
CD79..CD93 ; LVT # Lo [27] HANGUL SYLLABLE CYOG..HANGUL SYLLABLE CYOH
CD95..CDAF ; LVT # Lo [27] HANGUL SYLLABLE CUG..HANGUL SYLLABLE CUH
CDB1..CDCB ; LVT # Lo [27] HANGUL SYLLABLE CWEOG..HANGUL SYLLABLE CWEOH
CDCD..CDE7 ; LVT # Lo [27] HANGUL SYLLABLE CWEG..HANGUL SYLLABLE CWEH
CDE9..CE03 ; LVT # Lo [27] HANGUL SYLLABLE CWIG..HANGUL SYLLABLE CWIH
CE05..CE1F ; LVT # Lo [27] HANGUL SYLLABLE CYUG..HANGUL SYLLABLE CYUH
CE21..CE3B ; LVT # Lo [27] HANGUL SYLLABLE CEUG..HANGUL SYLLABLE CEUH
CE3D..CE57 ; LVT # Lo [27] HANGUL SYLLABLE CYIG..HANGUL SYLLABLE CYIH
CE59..CE73 ; LVT # Lo [27] HANGUL SYLLABLE CIG..HANGUL SYLLABLE CIH
CE75..CE8F ; LVT # Lo [27] HANGUL SYLLABLE KAG..HANGUL SYLLABLE KAH
CE91..CEAB ; LVT # Lo [27] HANGUL SYLLABLE KAEG..HANGUL SYLLABLE KAEH
CEAD..CEC7 ; LVT # Lo [27] HANGUL SYLLABLE KYAG..HANGUL SYLLABLE KYAH
CEC9..CEE3 ; LVT # Lo [27] HANGUL SYLLABLE KYAEG..HANGUL SYLLABLE KYAEH
CEE5..CEFF ; LVT # Lo [27] HANGUL SYLLABLE KEOG..HANGUL SYLLABLE KEOH
CF01..CF1B ; LVT # Lo [27] HANGUL SYLLABLE KEG..HANGUL SYLLABLE KEH
CF1D..CF37 ; LVT # Lo [27] HANGUL SYLLABLE KYEOG..HANGUL SYLLABLE KYEOH
CF39..CF53 ; LVT # Lo [27] HANGUL SYLLABLE KYEG..HANGUL SYLLABLE KYEH
CF55..CF6F ; LVT # Lo [27] HANGUL SYLLABLE KOG..HANGUL SYLLABLE KOH
CF71..CF8B ; LVT # Lo [27] HANGUL SYLLABLE KWAG..HANGUL SYLLABLE KWAH
CF8D..CFA7 ; LVT # Lo [27] HANGUL SYLLABLE KWAEG..HANGUL SYLLABLE KWAEH
CFA9..CFC3 ; LVT # Lo [27] HANGUL SYLLABLE KOEG..HANGUL SYLLABLE KOEH
CFC5..CFDF ; LVT # Lo [27] HANGUL SYLLABLE KYOG..HANGUL SYLLABLE KYOH
CFE1..CFFB ; LVT # Lo [27] HANGUL SYLLABLE KUG..HANGUL SYLLABLE KUH
CFFD..D017 ; LVT # Lo [27] HANGUL SYLLABLE KWEOG..HANGUL SYLLABLE KWEOH
D019..D033 ; LVT # Lo [27] HANGUL SYLLABLE KWEG..HANGUL SYLLABLE KWEH
D035..D04F ; LVT # Lo [27] HANGUL SYLLABLE KWIG..HANGUL SYLLABLE KWIH
D051..D06B ; LVT # Lo [27] HANGUL SYLLABLE KYUG..HANGUL SYLLABLE KYUH
D06D..D087 ; LVT # Lo [27] HANGUL SYLLABLE KEUG..HANGUL SYLLABLE KEUH
D089..D0A3 ; LVT # Lo [27] HANGUL SYLLABLE KYIG..HANGUL SYLLABLE KYIH
D0A5..D0BF ; LVT # Lo [27] HANGUL SYLLABLE KIG..HANGUL SYLLABLE KIH
D0C1..D0DB ; LVT # Lo [27] HANGUL SYLLABLE TAG..HANGUL SYLLABLE TAH
D0DD..D0F7 ; LVT # Lo [27] HANGUL SYLLABLE TAEG..HANGUL SYLLABLE TAEH
D0F9..D113 ; LVT # Lo [27] HANGUL SYLLABLE TYAG..HANGUL SYLLABLE TYAH
D115..D12F ; LVT # Lo [27] HANGUL SYLLABLE TYAEG..HANGUL SYLLABLE TYAEH
D131..D14B ; LVT # Lo [27] HANGUL SYLLABLE TEOG..HANGUL SYLLABLE TEOH
D14D..D167 ; LVT # Lo [27] HANGUL SYLLABLE TEG..HANGUL SYLLABLE TEH
D169..D183 ; LVT # Lo [27] HANGUL SYLLABLE TYEOG..HANGUL SYLLABLE TYEOH
D185..D19F ; LVT # Lo [27] HANGUL SYLLABLE TYEG..HANGUL SYLLABLE TYEH
D1A1..D1BB ; LVT # Lo [27] HANGUL SYLLABLE TOG..HANGUL SYLLABLE TOH
D1BD..D1D7 ; LVT # Lo [27] HANGUL SYLLABLE TWAG..HANGUL SYLLABLE TWAH
D1D9..D1F3 ; LVT # Lo [27] HANGUL SYLLABLE TWAEG..HANGUL SYLLABLE TWAEH
D1F5..D20F ; LVT # Lo [27] HANGUL SYLLABLE TOEG..HANGUL SYLLABLE TOEH
D211..D22B ; LVT # Lo [27] HANGUL SYLLABLE TYOG..HANGUL SYLLABLE TYOH
D22D..D247 ; LVT # Lo [27] HANGUL SYLLABLE TUG..HANGUL SYLLABLE TUH
D249..D263 ; LVT # Lo [27] HANGUL SYLLABLE TWEOG..HANGUL SYLLABLE TWEOH
D265..D27F ; LVT # Lo [27] HANGUL SYLLABLE TWEG..HANGUL SYLLABLE TWEH
D281..D29B ; LVT # Lo [27] HANGUL SYLLABLE TWIG..HANGUL SYLLABLE TWIH
D29D..D2B7 ; LVT # Lo [27] HANGUL SYLLABLE TYUG..HANGUL SYLLABLE TYUH
D2B9..D2D3 ; LVT # Lo [27] HANGUL SYLLABLE TEUG..HANGUL SYLLABLE TEUH
D2D5..D2EF ; LVT # Lo [27] HANGUL SYLLABLE TYIG..HANGUL SYLLABLE TYIH
D2F1..D30B ; LVT # Lo [27] HANGUL SYLLABLE TIG..HANGUL SYLLABLE TIH
D30D..D327 ; LVT # Lo [27] HANGUL SYLLABLE PAG..HANGUL SYLLABLE PAH
D329..D343 ; LVT # Lo [27] HANGUL SYLLABLE PAEG..HANGUL SYLLABLE PAEH
D345..D35F ; LVT # Lo [27] HANGUL SYLLABLE PYAG..HANGUL SYLLABLE PYAH
D361..D37B ; LVT # Lo [27] HANGUL SYLLABLE PYAEG..HANGUL SYLLABLE PYAEH
D37D..D397 ; LVT # Lo [27] HANGUL SYLLABLE PEOG..HANGUL SYLLABLE PEOH
D399..D3B3 ; LVT # Lo [27] HANGUL SYLLABLE PEG..HANGUL SYLLABLE PEH
D3B5..D3CF ; LVT # Lo [27] HANGUL SYLLABLE PYEOG..HANGUL SYLLABLE PYEOH
D3D1..D3EB ; LVT # Lo [27] HANGUL SYLLABLE PYEG..HANGUL SYLLABLE PYEH
D3ED..D407 ; LVT # Lo [27] HANGUL SYLLABLE POG..HANGUL SYLLABLE POH
D409..D423 ; LVT # Lo [27] HANGUL SYLLABLE PWAG..HANGUL SYLLABLE PWAH
D425..D43F ; LVT # Lo [27] HANGUL SYLLABLE PWAEG..HANGUL SYLLABLE PWAEH
D441..D45B ; LVT # Lo [27] HANGUL SYLLABLE POEG..HANGUL SYLLABLE POEH
D45D..D477 ; LVT # Lo [27] HANGUL SYLLABLE PYOG..HANGUL SYLLABLE PYOH
D479..D493 ; LVT # Lo [27] HANGUL SYLLABLE PUG..HANGUL SYLLABLE PUH
D495..D4AF ; LVT # Lo [27] HANGUL SYLLABLE PWEOG..HANGUL SYLLABLE PWEOH
D4B1..D4CB ; LVT # Lo [27] HANGUL SYLLABLE PWEG..HANGUL SYLLABLE PWEH
D4CD..D4E7 ; LVT # Lo [27] HANGUL SYLLABLE PWIG..HANGUL SYLLABLE PWIH
D4E9..D503 ; LVT # Lo [27] HANGUL SYLLABLE PYUG..HANGUL SYLLABLE PYUH
D505..D51F ; LVT # Lo [27] HANGUL SYLLABLE PEUG..HANGUL SYLLABLE PEUH
D521..D53B ; LVT # Lo [27] HANGUL SYLLABLE PYIG..HANGUL SYLLABLE PYIH
D53D..D557 ; LVT # Lo [27] HANGUL SYLLABLE PIG..HANGUL SYLLABLE PIH
D559..D573 ; LVT # Lo [27] HANGUL SYLLABLE HAG..HANGUL SYLLABLE HAH
D575..D58F ; LVT # Lo [27] HANGUL SYLLABLE HAEG..HANGUL SYLLABLE HAEH
D591..D5AB ; LVT # Lo [27] HANGUL SYLLABLE HYAG..HANGUL SYLLABLE HYAH
D5AD..D5C7 ; LVT # Lo [27] HANGUL SYLLABLE HYAEG..HANGUL SYLLABLE HYAEH
D5C9..D5E3 ; LVT # Lo [27] HANGUL SYLLABLE HEOG..HANGUL SYLLABLE HEOH
D5E5..D5FF ; LVT # Lo [27] HANGUL SYLLABLE HEG..HANGUL SYLLABLE HEH
D601..D61B ; LVT # Lo [27] HANGUL SYLLABLE HYEOG..HANGUL SYLLABLE HYEOH
D61D..D637 ; LVT # Lo [27] HANGUL SYLLABLE HYEG..HANGUL SYLLABLE HYEH
D639..D653 ; LVT # Lo [27] HANGUL SYLLABLE HOG..HANGUL SYLLABLE HOH
D655..D66F ; LVT # Lo [27] HANGUL SYLLABLE HWAG..HANGUL SYLLABLE HWAH
D671..D68B ; LVT # Lo [27] HANGUL SYLLABLE HWAEG..HANGUL SYLLABLE HWAEH
D68D..D6A7 ; LVT # Lo [27] HANGUL SYLLABLE HOEG..HANGUL SYLLABLE HOEH
D6A9..D6C3 ; LVT # Lo [27] HANGUL SYLLABLE HYOG..HANGUL SYLLABLE HYOH
D6C5..D6DF ; LVT # Lo [27] HANGUL SYLLABLE HUG..HANGUL SYLLABLE HUH
D6E1..D6FB ; LVT # Lo [27] HANGUL SYLLABLE HWEOG..HANGUL SYLLABLE HWEOH
D6FD..D717 ; LVT # Lo [27] HANGUL SYLLABLE HWEG..HANGUL SYLLABLE HWEH
D719..D733 ; LVT # Lo [27] HANGUL SYLLABLE HWIG..HANGUL SYLLABLE HWIH
D735..D74F ; LVT # Lo [27] HANGUL SYLLABLE HYUG..HANGUL SYLLABLE HYUH
D751..D76B ; LVT # Lo [27] HANGUL SYLLABLE HEUG..HANGUL SYLLABLE HEUH
D76D..D787 ; LVT # Lo [27] HANGUL SYLLABLE HYIG..HANGUL SYLLABLE HYIH
D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
# Total code points: 10773
# EOF

19129
deps/zg/data/unicode/NormalizationTest.txt vendored Normal file

File diff suppressed because it is too large Load Diff

1827
deps/zg/data/unicode/PropList.txt vendored Normal file

File diff suppressed because it is too large Load Diff

3033
deps/zg/data/unicode/Scripts.txt vendored Normal file

File diff suppressed because it is too large Load Diff

34931
deps/zg/data/unicode/UnicodeData.txt vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1320
deps/zg/data/unicode/emoji/emoji-data.txt vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,291 @@
# DerivedNumericType-15.1.0.txt
# Date: 2023-01-05, 20:34:41 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
# ================================================
# Derived Property: Numeric_Type
# The values are based on fields 6-8 of UnicodeData.txt, plus the fields
# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan).
# The derivations for these values are as follows.
# Numeric_Type=Decimal: When there is a value in field 6.
# Numeric_Type=Digit: When there is a value in field 7, but not in field 6.
# Numeric_Type=Numeric: When there are values for kAccountingNumeric, kOtherNumeric, kPrimaryNumeric,
# or there is a value in field 8, but not in field 7.
# Numeric_Type=None: Otherwise
# All code points not explicitly listed for Numeric_Type
# have the value None.
# @missing: 0000..10FFFF; None
# ================================================
00BC..00BE ; Numeric # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
09F4..09F9 ; Numeric # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
0B72..0B77 ; Numeric # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
0BF0..0BF2 ; Numeric # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
0C78..0C7E ; Numeric # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0D58..0D5E ; Numeric # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
0D70..0D78 ; Numeric # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
0F2A..0F33 ; Numeric # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
1372..137C ; Numeric # No [11] ETHIOPIC NUMBER TEN..ETHIOPIC NUMBER TEN THOUSAND
16EE..16F0 ; Numeric # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
17F0..17F9 ; Numeric # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
2150..215F ; Numeric # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
2160..2182 ; Numeric # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2185..2188 ; Numeric # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
2189 ; Numeric # No VULGAR FRACTION ZERO THIRDS
2469..2473 ; Numeric # No [11] CIRCLED NUMBER TEN..CIRCLED NUMBER TWENTY
247D..2487 ; Numeric # No [11] PARENTHESIZED NUMBER TEN..PARENTHESIZED NUMBER TWENTY
2491..249B ; Numeric # No [11] NUMBER TEN FULL STOP..NUMBER TWENTY FULL STOP
24EB..24F4 ; Numeric # No [10] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED NUMBER TWENTY
24FE ; Numeric # No DOUBLE CIRCLED NUMBER TEN
277F ; Numeric # No DINGBAT NEGATIVE CIRCLED NUMBER TEN
2789 ; Numeric # No DINGBAT CIRCLED SANS-SERIF NUMBER TEN
2793 ; Numeric # No DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2CFD ; Numeric # No COPTIC FRACTION ONE HALF
3007 ; Numeric # Nl IDEOGRAPHIC NUMBER ZERO
3021..3029 ; Numeric # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Numeric # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
3192..3195 ; Numeric # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3220..3229 ; Numeric # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
3248..324F ; Numeric # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
3251..325F ; Numeric # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
3280..3289 ; Numeric # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
32B1..32BF ; Numeric # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
3405 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3405
3483 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3483
382A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-382A
3B4D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3B4D
4E00 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E00
4E03 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E03
4E07 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E07
4E09 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E09
4E24 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E24
4E5D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E5D
4E8C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E8C
4E94 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E94
4E96 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E96
4EAC ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EAC
4EBF..4EC0 ; Numeric # Lo [2] CJK UNIFIED IDEOGRAPH-4EBF..CJK UNIFIED IDEOGRAPH-4EC0
4EDF ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EDF
4EE8 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EE8
4F0D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4F0D
4F70 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4F70
4FE9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4FE9
5006 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5006
5104 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5104
5146 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5146
5169 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5169
516B ; Numeric # Lo CJK UNIFIED IDEOGRAPH-516B
516D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-516D
5341 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5341
5343..5345 ; Numeric # Lo [3] CJK UNIFIED IDEOGRAPH-5343..CJK UNIFIED IDEOGRAPH-5345
534C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-534C
53C1..53C4 ; Numeric # Lo [4] CJK UNIFIED IDEOGRAPH-53C1..CJK UNIFIED IDEOGRAPH-53C4
56DB ; Numeric # Lo CJK UNIFIED IDEOGRAPH-56DB
58F1 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-58F1
58F9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-58F9
5E7A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5E7A
5EFE..5EFF ; Numeric # Lo [2] CJK UNIFIED IDEOGRAPH-5EFE..CJK UNIFIED IDEOGRAPH-5EFF
5F0C..5F0E ; Numeric # Lo [3] CJK UNIFIED IDEOGRAPH-5F0C..CJK UNIFIED IDEOGRAPH-5F0E
5F10 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5F10
62D0 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-62D0
62FE ; Numeric # Lo CJK UNIFIED IDEOGRAPH-62FE
634C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-634C
67D2 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-67D2
6D1E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-6D1E
6F06 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-6F06
7396 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-7396
767E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-767E
7695 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-7695
79ED ; Numeric # Lo CJK UNIFIED IDEOGRAPH-79ED
8086 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8086
842C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-842C
8CAE ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8CAE
8CB3 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8CB3
8D30 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8D30
920E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-920E
94A9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-94A9
9621 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9621
9646 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9646
964C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-964C
9678 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9678
96F6 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-96F6
A6E6..A6EF ; Numeric # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A830..A835 ; Numeric # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
F96B ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F96B
F973 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F973
F978 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F978
F9B2 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9B2
F9D1 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9D1
F9D3 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9D3
F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
10107..10133 ; Numeric # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10140..10174 ; Numeric # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10175..10178 ; Numeric # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
1018A..1018B ; Numeric # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
102E1..102FB ; Numeric # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
10320..10323 ; Numeric # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
10341 ; Numeric # Nl GOTHIC LETTER NINETY
1034A ; Numeric # Nl GOTHIC LETTER NINE HUNDRED
103D1..103D5 ; Numeric # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
10858..1085F ; Numeric # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
10879..1087F ; Numeric # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
108A7..108AF ; Numeric # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
108FB..108FF ; Numeric # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED
10916..1091B ; Numeric # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
109BC..109BD ; Numeric # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF
109C0..109CF ; Numeric # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY
109D2..109FF ; Numeric # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS
10A44..10A48 ; Numeric # No [5] KHAROSHTHI NUMBER TEN..KHAROSHTHI FRACTION ONE HALF
10A7D..10A7E ; Numeric # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
10A9D..10A9F ; Numeric # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
10AEB..10AEF ; Numeric # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
10B58..10B5F ; Numeric # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
10B78..10B7F ; Numeric # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
10BA9..10BAF ; Numeric # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
10CFA..10CFF ; Numeric # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND
10E69..10E7E ; Numeric # No [22] RUMI NUMBER TEN..RUMI FRACTION TWO THIRDS
10F1D..10F26 ; Numeric # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F51..10F54 ; Numeric # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
10FC5..10FCB ; Numeric # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
1105B..11065 ; Numeric # No [11] BRAHMI NUMBER TEN..BRAHMI NUMBER ONE THOUSAND
111E1..111F4 ; Numeric # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
1173A..1173B ; Numeric # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
118EA..118F2 ; Numeric # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
11C5A..11C6C ; Numeric # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
11FC0..11FD4 ; Numeric # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
12400..1246E ; Numeric # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
16B5B..16B61 ; Numeric # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
16E80..16E96 ; Numeric # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
1D2C0..1D2D3 ; Numeric # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
1D2E0..1D2F3 ; Numeric # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
1D360..1D378 ; Numeric # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
1E8C7..1E8CF ; Numeric # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
1EC71..1ECAB ; Numeric # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE
1ECAD..1ECAF ; Numeric # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS
1ECB1..1ECB4 ; Numeric # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK
1ED01..1ED2D ; Numeric # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND
1ED2F..1ED3D ; Numeric # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
1F10B..1F10C ; Numeric # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
20001 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20001
20064 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20064
200E2 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-200E2
20121 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20121
2092A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2092A
20983 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20983
2098C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2098C
2099C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2099C
20AEA ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20AEA
20AFD ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20AFD
20B19 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20B19
22390 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-22390
22998 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-22998
23B1B ; Numeric # Lo CJK UNIFIED IDEOGRAPH-23B1B
2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D
2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890
# Total code points: 1114
# ================================================
00B2..00B3 ; Digit # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B9 ; Digit # No SUPERSCRIPT ONE
1369..1371 ; Digit # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
19DA ; Digit # No NEW TAI LUE THAM DIGIT ONE
2070 ; Digit # No SUPERSCRIPT ZERO
2074..2079 ; Digit # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
2080..2089 ; Digit # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
2460..2468 ; Digit # No [9] CIRCLED DIGIT ONE..CIRCLED DIGIT NINE
2474..247C ; Digit # No [9] PARENTHESIZED DIGIT ONE..PARENTHESIZED DIGIT NINE
2488..2490 ; Digit # No [9] DIGIT ONE FULL STOP..DIGIT NINE FULL STOP
24EA ; Digit # No CIRCLED DIGIT ZERO
24F5..24FD ; Digit # No [9] DOUBLE CIRCLED DIGIT ONE..DOUBLE CIRCLED DIGIT NINE
24FF ; Digit # No NEGATIVE CIRCLED DIGIT ZERO
2776..277E ; Digit # No [9] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED DIGIT NINE
2780..2788 ; Digit # No [9] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT CIRCLED SANS-SERIF DIGIT NINE
278A..2792 ; Digit # No [9] DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
10A40..10A43 ; Digit # No [4] KHAROSHTHI DIGIT ONE..KHAROSHTHI DIGIT FOUR
10E60..10E68 ; Digit # No [9] RUMI DIGIT ONE..RUMI DIGIT NINE
11052..1105A ; Digit # No [9] BRAHMI NUMBER ONE..BRAHMI NUMBER NINE
1F100..1F10A ; Digit # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
# Total code points: 128
# ================================================
0030..0039 ; Decimal # Nd [10] DIGIT ZERO..DIGIT NINE
0660..0669 ; Decimal # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
06F0..06F9 ; Decimal # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 ; Decimal # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F ; Decimal # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF ; Decimal # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0A66..0A6F ; Decimal # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF ; Decimal # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F ; Decimal # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF ; Decimal # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0C66..0C6F ; Decimal # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0CE6..0CEF ; Decimal # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F ; Decimal # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0DE6..0DEF ; Decimal # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
0E50..0E59 ; Decimal # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 ; Decimal # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 ; Decimal # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
1040..1049 ; Decimal # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
1090..1099 ; Decimal # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
17E0..17E9 ; Decimal # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 ; Decimal # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1946..194F ; Decimal # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
19D0..19D9 ; Decimal # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
1A80..1A89 ; Decimal # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; Decimal # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1B50..1B59 ; Decimal # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1BB0..1BB9 ; Decimal # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
1C40..1C49 ; Decimal # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
1C50..1C59 ; Decimal # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
A620..A629 ; Decimal # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A8D0..A8D9 ; Decimal # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A900..A909 ; Decimal # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A9D0..A9D9 ; Decimal # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
A9F0..A9F9 ; Decimal # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
AA50..AA59 ; Decimal # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
112F0..112F9 ; Decimal # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
11450..11459 ; Decimal # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Decimal # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E4F0..1E4F9 ; Decimal # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
# Total code points: 680
# EOF

66
deps/zg/src/CanonData.zig vendored Normal file
View File

@@ -0,0 +1,66 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
allocator: mem.Allocator,
nfc: std.AutoHashMap([2]u21, u21),
nfd: [][]u21 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("canon");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{
.allocator = allocator,
.nfc = std.AutoHashMap([2]u21, u21).init(allocator),
.nfd = try allocator.alloc([]u21, 0x110000),
};
var slices: usize = 0;
errdefer {
self.nfc.deinit();
for (self.nfd[0..slices]) |slice| self.allocator.free(slice);
self.allocator.free(self.nfd);
}
@memset(self.nfd, &.{});
while (true) {
const len: u8 = try reader.readInt(u8, endian);
if (len == 0) break;
const cp = try reader.readInt(u24, endian);
self.nfd[cp] = try allocator.alloc(u21, len - 1);
slices += 1;
for (0..len - 1) |i| {
self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian));
}
if (len == 3) {
try self.nfc.put(self.nfd[cp][0..2].*, @intCast(cp));
}
}
return self;
}
pub fn deinit(self: *Self) void {
self.nfc.deinit();
for (self.nfd) |slice| self.allocator.free(slice);
self.allocator.free(self.nfd);
}
/// Returns canonical decomposition for `cp`.
pub fn toNfd(self: Self, cp: u21) []const u21 {
return self.nfd[cp];
}
// Returns the primary composite for the codepoints in `cp`.
pub fn toNfc(self: Self, cps: [2]u21) ?u21 {
return self.nfc.get(cps);
}

202
deps/zg/src/CaseData.zig vendored Normal file
View File

@@ -0,0 +1,202 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
const unicode = std.unicode;
const CodePointIterator = @import("code_point").Iterator;
allocator: mem.Allocator,
case_map: [][2]u21,
prop_s1: []u16 = undefined,
prop_s2: []u8 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const endian = builtin.cpu.arch.endian();
var self = Self{
.allocator = allocator,
.case_map = try allocator.alloc([2]u21, 0x110000),
};
errdefer allocator.free(self.case_map);
for (0..0x110000) |i| {
const cp: u21 = @intCast(i);
self.case_map[cp] = .{ cp, cp };
}
// Uppercase
const upper_bytes = @embedFile("upper");
var upper_fbs = std.io.fixedBufferStream(upper_bytes);
var upper_decomp = decompressor(.raw, upper_fbs.reader());
var upper_reader = upper_decomp.reader();
while (true) {
const cp = try upper_reader.readInt(i24, endian);
if (cp == 0) break;
const diff = try upper_reader.readInt(i24, endian);
self.case_map[@intCast(cp)][0] = @intCast(cp + diff);
}
// Lowercase
const lower_bytes = @embedFile("lower");
var lower_fbs = std.io.fixedBufferStream(lower_bytes);
var lower_decomp = decompressor(.raw, lower_fbs.reader());
var lower_reader = lower_decomp.reader();
while (true) {
const cp = try lower_reader.readInt(i24, endian);
if (cp == 0) break;
const diff = try lower_reader.readInt(i24, endian);
self.case_map[@intCast(cp)][1] = @intCast(cp + diff);
}
// Case properties
const cp_bytes = @embedFile("case_prop");
var cp_fbs = std.io.fixedBufferStream(cp_bytes);
var cp_decomp = decompressor(.raw, cp_fbs.reader());
var cp_reader = cp_decomp.reader();
const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
self.prop_s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.prop_s1);
for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian);
const stage_2_len: u16 = try cp_reader.readInt(u16, endian);
self.prop_s2 = try allocator.alloc(u8, stage_2_len);
errdefer allocator.free(self.prop_s2);
_ = try cp_reader.readAll(self.prop_s2);
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.case_map);
self.allocator.free(self.prop_s1);
self.allocator.free(self.prop_s2);
}
// Returns true if `cp` is either upper, lower, or title case.
pub fn isCased(self: Self, cp: u21) bool {
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}
// Returns true if `cp` is uppercase.
pub fn isUpper(self: Self, cp: u21) bool {
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}
/// Returns true if `str` is all uppercase.
pub fn isUpperStr(self: Self, str: []const u8) bool {
var iter = CodePointIterator{ .bytes = str };
return while (iter.next()) |cp| {
if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false;
} else true;
}
test "isUpperStr" {
const cd = try init(testing.allocator);
defer cd.deinit();
try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!"));
try testing.expect(!cd.isUpperStr("hello, world 2112!"));
try testing.expect(!cd.isUpperStr("Hello, World 2112!"));
}
/// Returns uppercase mapping for `cp`.
pub fn toUpper(self: Self, cp: u21) u21 {
return self.case_map[cp][0];
}
/// Returns a new string with all letters in uppercase.
/// Caller must free returned bytes with `allocator`.
pub fn toUpperStr(
self: Self,
allocator: mem.Allocator,
str: []const u8,
) ![]u8 {
var bytes = std.ArrayList(u8).init(allocator);
defer bytes.deinit();
var iter = CodePointIterator{ .bytes = str };
var buf: [4]u8 = undefined;
while (iter.next()) |cp| {
const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf);
try bytes.appendSlice(buf[0..len]);
}
return try bytes.toOwnedSlice();
}
test "toUpperStr" {
const cd = try init(testing.allocator);
defer cd.deinit();
const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!");
defer testing.allocator.free(uppered);
try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
}
// Returns true if `cp` is lowercase.
pub fn isLower(self: Self, cp: u21) bool {
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}
/// Returns true if `str` is all lowercase.
pub fn isLowerStr(self: Self, str: []const u8) bool {
var iter = CodePointIterator{ .bytes = str };
return while (iter.next()) |cp| {
if (self.isCased(cp.code) and !self.isLower(cp.code)) break false;
} else true;
}
test "isLowerStr" {
const cd = try init(testing.allocator);
defer cd.deinit();
try testing.expect(cd.isLowerStr("hello, world 2112!"));
try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!"));
try testing.expect(!cd.isLowerStr("Hello, World 2112!"));
}
/// Returns lowercase mapping for `cp`.
pub fn toLower(self: Self, cp: u21) u21 {
return self.case_map[cp][1];
}
/// Returns a new string with all letters in lowercase.
/// Caller must free returned bytes with `allocator`.
pub fn toLowerStr(
self: Self,
allocator: mem.Allocator,
str: []const u8,
) ![]u8 {
var bytes = std.ArrayList(u8).init(allocator);
defer bytes.deinit();
var iter = CodePointIterator{ .bytes = str };
var buf: [4]u8 = undefined;
while (iter.next()) |cp| {
const len = try unicode.utf8Encode(self.toLower(cp.code), &buf);
try bytes.appendSlice(buf[0..len]);
}
return try bytes.toOwnedSlice();
}
test "toLowerStr" {
const cd = try init(testing.allocator);
defer cd.deinit();
const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!");
defer testing.allocator.free(lowered);
try testing.expectEqualStrings("hello, world 2112!", lowered);
}

189
deps/zg/src/CaseFold.zig vendored Normal file
View File

@@ -0,0 +1,189 @@
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const ascii = @import("ascii");
pub const FoldData = @import("FoldData");
const Normalize = @import("Normalize");
fold_data: *const FoldData,
const Self = @This();
/// Produces the case folded code points for `cps`. Caller must free returned
/// slice with `allocator`.
pub fn caseFold(
self: Self,
allocator: mem.Allocator,
cps: []const u21,
) ![]const u21 {
var cfcps = std.ArrayList(u21).init(allocator);
defer cfcps.deinit();
var buf: [3]u21 = undefined;
for (cps) |cp| {
const cf = self.fold_data.caseFold(cp, &buf);
if (cf.len == 0) {
try cfcps.append(cp);
} else {
try cfcps.appendSlice(cf);
}
}
return try cfcps.toOwnedSlice();
}
fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
return for (cps) |cp| {
if (self.fold_data.changesWhenCaseFolded(cp)) break true;
} else false;
}
/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most
/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
pub fn compatCaselessMatch(
self: Self,
allocator: mem.Allocator,
normalizer: *const Normalize,
a: []const u8,
b: []const u8,
) !bool {
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
// Process a
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
defer allocator.free(nfd_a);
var need_free_cf_nfd_a = false;
var cf_nfd_a: []const u21 = nfd_a;
if (self.changesWhenCaseFolded(nfd_a)) {
cf_nfd_a = try self.caseFold(allocator, nfd_a);
need_free_cf_nfd_a = true;
}
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
defer allocator.free(nfkd_cf_nfd_a);
const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
defer allocator.free(cf_nfkd_cf_nfd_a);
const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
// Process b
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
defer allocator.free(nfd_b);
var need_free_cf_nfd_b = false;
var cf_nfd_b: []const u21 = nfd_b;
if (self.changesWhenCaseFolded(nfd_b)) {
cf_nfd_b = try self.caseFold(allocator, nfd_b);
need_free_cf_nfd_b = true;
}
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
defer allocator.free(nfkd_cf_nfd_b);
const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
defer allocator.free(cf_nfkd_cf_nfd_b);
const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
}
test "compatCaselessMatch" {
const allocator = testing.allocator;
const norm_data = try Normalize.NormData.init(allocator);
defer norm_data.deinit();
const n = Normalize{ .norm_data = &norm_data };
const fold_data = try FoldData.init(allocator);
defer fold_data.deinit();
const caser = Self{ .fold_data = &fold_data };
try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
const a = "Héllo World! \u{3d3}";
const b = "He\u{301}llo World! \u{3a5}\u{301}";
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));
const c = "He\u{301}llo World! \u{3d2}\u{301}";
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
}
/// Performs canonical caseless string matching by decomposing to NFD. This is
/// faster than `compatCaselessMatch`, but less comprehensive.
pub fn canonCaselessMatch(
self: Self,
allocator: mem.Allocator,
normalizer: *const Normalize,
a: []const u8,
b: []const u8,
) !bool {
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
// Process a
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
defer allocator.free(nfd_a);
var need_free_cf_nfd_a = false;
var cf_nfd_a: []const u21 = nfd_a;
if (self.changesWhenCaseFolded(nfd_a)) {
cf_nfd_a = try self.caseFold(allocator, nfd_a);
need_free_cf_nfd_a = true;
}
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
var need_free_nfd_cf_nfd_a = false;
var nfd_cf_nfd_a = cf_nfd_a;
if (!need_free_cf_nfd_a) {
nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a);
need_free_nfd_cf_nfd_a = true;
}
defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
// Process b
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
defer allocator.free(nfd_b);
var need_free_cf_nfd_b = false;
var cf_nfd_b: []const u21 = nfd_b;
if (self.changesWhenCaseFolded(nfd_b)) {
cf_nfd_b = try self.caseFold(allocator, nfd_b);
need_free_cf_nfd_b = true;
}
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
var need_free_nfd_cf_nfd_b = false;
var nfd_cf_nfd_b = cf_nfd_b;
if (!need_free_cf_nfd_b) {
nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b);
need_free_nfd_cf_nfd_b = true;
}
defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b);
}
test "canonCaselessMatch" {
const allocator = testing.allocator;
const norm_data = try Normalize.NormData.init(allocator);
defer norm_data.deinit();
const n = Normalize{ .norm_data = &norm_data };
const fold_data = try FoldData.init(allocator);
defer fold_data.deinit();
const caser = Self{ .fold_data = &fold_data };
try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
const a = "Héllo World! \u{3d3}";
const b = "He\u{301}llo World! \u{3a5}\u{301}";
try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b));
const c = "He\u{301}llo World! \u{3d2}\u{301}";
try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c));
}

49
deps/zg/src/CombiningData.zig vendored Normal file
View File

@@ -0,0 +1,49 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u8 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("ccc");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const stage_1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.s1);
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const stage_2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u8, stage_2_len);
errdefer allocator.free(self.s2);
_ = try reader.readAll(self.s2);
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
}
/// Returns the canonical combining class for a code point.
pub fn ccc(self: Self, cp: u21) u8 {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
}
/// True if `cp` is a starter code point, not a combining character.
pub fn isStarter(self: Self, cp: u21) bool {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0;
}

50
deps/zg/src/CompatData.zig vendored Normal file
View File

@@ -0,0 +1,50 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
allocator: mem.Allocator,
nfkd: [][]u21 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("compat");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{
.allocator = allocator,
.nfkd = try allocator.alloc([]u21, 0x110000),
};
errdefer self.deinit();
@memset(self.nfkd, &.{});
while (true) {
const len: u8 = try reader.readInt(u8, endian);
if (len == 0) break;
const cp = try reader.readInt(u24, endian);
self.nfkd[cp] = try allocator.alloc(u21, len - 1);
for (0..len - 1) |i| {
self.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian));
}
}
return self;
}
pub fn deinit(self: *const Self) void {
for (self.nfkd) |slice| {
if (slice.len != 0) self.allocator.free(slice);
}
self.allocator.free(self.nfkd);
}
/// Returns compatibility decomposition for `cp`.
pub fn toNfkd(self: Self, cp: u21) []u21 {
return self.nfkd[cp];
}

355
deps/zg/src/DisplayWidth.zig vendored Normal file
View File

@@ -0,0 +1,355 @@
const std = @import("std");
const builtin = @import("builtin");
const ArrayList = std.ArrayList;
const mem = std.mem;
const simd = std.simd;
const testing = std.testing;
const ascii = @import("ascii");
const CodePointIterator = @import("code_point").Iterator;
const GraphemeIterator = @import("grapheme").Iterator;
pub const DisplayWidthData = @import("DisplayWidthData");
data: *const DisplayWidthData,
const Self = @This();
/// strWidth returns the total display width of `str` as the number of cells
/// required in a fixed-pitch font (i.e. a terminal screen).
pub fn strWidth(self: Self, str: []const u8) usize {
var total: isize = 0;
// ASCII fast path
if (ascii.isAsciiOnly(str)) {
for (str) |b| total += self.data.codePointWidth(b);
return @intCast(@max(0, total));
}
var giter = GraphemeIterator.init(str, &self.data.g_data);
while (giter.next()) |gc| {
var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
var gc_total: isize = 0;
while (cp_iter.next()) |cp| {
var w = self.data.codePointWidth(cp.code);
if (w != 0) {
// Handle text emoji sequence.
if (cp_iter.next()) |ncp| {
// emoji text sequence.
if (ncp.code == 0xFE0E) w = 1;
if (ncp.code == 0xFE0F) w = 2;
}
// Only adding width of first non-zero-width code point.
if (gc_total == 0) {
gc_total = w;
break;
}
}
}
total += gc_total;
}
return @intCast(@max(0, total));
}
test "strWidth" {
const data = try DisplayWidthData.init(testing.allocator);
defer data.deinit();
const self = Self{ .data = &data };
try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n"));
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}"));
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊"));
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊"));
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)"));
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸"));
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector
try testing.expectEqual(@as(usize, 0), self.strWidth("A\x08")); // Backspace
try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA")); // DEL
try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA\x08\x08")); // never less than o
// wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
const empty = "";
try testing.expectEqual(@as(usize, 0), self.strWidth(empty));
const with_null = "hello\x00world";
try testing.expectEqual(@as(usize, 10), self.strWidth(with_null));
const hello_jp = "コンニチハ, セカイ!";
try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp));
const control = "\x1b[0m";
try testing.expectEqual(@as(usize, 3), self.strWidth(control));
const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
try testing.expectEqual(@as(usize, 3), self.strWidth(balinese));
// These commented out tests require a new specification for complex scripts.
// See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
// const jamo = "\u{1100}\u{1160}";
// try testing.expectEqual(@as(usize, 3), strWidth(jamo));
// const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
// try testing.expectEqual(@as(usize, 3), strWidth(devengari));
// const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
// try testing.expectEqual(@as(usize, 5), strWidth(tamal));
// const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
// try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
// The following passes but as a mere coincidence.
const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2));
// From Rust https://github.com/jameslanska/unicode-display-width
try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻"));
try testing.expectEqual(@as(usize, 2), self.strWidth("🦀"));
try testing.expectEqual(@as(usize, 2), self.strWidth("👨‍👩‍👧‍👧"));
try testing.expectEqual(@as(usize, 2), self.strWidth("👩‍🔬"));
try testing.expectEqual(@as(usize, 9), self.strWidth("sane text"));
try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나"));
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}"));
}
/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
/// If the length of `str` and `total_width` have different parity, the right side of `str` will
/// receive one additional pad. This makes sure the returned string fills the requested width.
/// Caller must free returned bytes with `allocator`.
pub fn center(
self: Self,
allocator: mem.Allocator,
str: []const u8,
total_width: usize,
pad: []const u8,
) ![]u8 {
const str_width = self.strWidth(str);
if (str_width > total_width) return error.StrTooLong;
if (str_width == total_width) return try allocator.dupe(u8, str);
const pad_width = self.strWidth(pad);
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
const margin_width = @divFloor((total_width - str_width), 2);
if (pad_width > margin_width) return error.PadTooLong;
const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
var result = try allocator.alloc(u8, pads * pad.len + str.len);
var bytes_index: usize = 0;
var pads_index: usize = 0;
while (pads_index < pads / 2) : (pads_index += 1) {
@memcpy(result[bytes_index..][0..pad.len], pad);
bytes_index += pad.len;
}
@memcpy(result[bytes_index..][0..str.len], str);
bytes_index += str.len;
pads_index = 0;
while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
@memcpy(result[bytes_index..][0..pad.len], pad);
bytes_index += pad.len;
}
return result;
}
test "center" {
const allocator = testing.allocator;
const data = try DisplayWidthData.init(allocator);
defer data.deinit();
const self = Self{ .data = &data };
// Input and width both have odd length
var centered = try self.center(allocator, "abc", 9, "*");
try testing.expectEqualSlices(u8, "***abc***", centered);
// Input and width both have even length
testing.allocator.free(centered);
centered = try self.center(allocator, "w😊w", 10, "-");
try testing.expectEqualSlices(u8, "---w😊w---", centered);
// Input has even length, width has odd length
testing.allocator.free(centered);
centered = try self.center(allocator, "1234", 9, "-");
try testing.expectEqualSlices(u8, "--1234---", centered);
// Input has odd length, width has even length
testing.allocator.free(centered);
centered = try self.center(allocator, "123", 8, "-");
try testing.expectEqualSlices(u8, "--123---", centered);
// Input is the same length as the width
testing.allocator.free(centered);
centered = try self.center(allocator, "123", 3, "-");
try testing.expectEqualSlices(u8, "123", centered);
// Input is empty
testing.allocator.free(centered);
centered = try self.center(allocator, "", 3, "-");
try testing.expectEqualSlices(u8, "---", centered);
// Input is empty and width is zero
testing.allocator.free(centered);
centered = try self.center(allocator, "", 0, "-");
try testing.expectEqualSlices(u8, "", centered);
// Input is longer than the width, which is an error
testing.allocator.free(centered);
try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-"));
}
/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
/// on the left side. Caller must free returned bytes with `allocator`.
pub fn padLeft(
self: Self,
allocator: mem.Allocator,
str: []const u8,
total_width: usize,
pad: []const u8,
) ![]u8 {
const str_width = self.strWidth(str);
if (str_width > total_width) return error.StrTooLong;
const pad_width = self.strWidth(pad);
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
const margin_width = total_width - str_width;
if (pad_width > margin_width) return error.PadTooLong;
const pads = @divFloor(margin_width, pad_width);
var result = try allocator.alloc(u8, pads * pad.len + str.len);
var bytes_index: usize = 0;
var pads_index: usize = 0;
while (pads_index < pads) : (pads_index += 1) {
@memcpy(result[bytes_index..][0..pad.len], pad);
bytes_index += pad.len;
}
@memcpy(result[bytes_index..][0..str.len], str);
return result;
}
test "padLeft" {
const allocator = testing.allocator;
const data = try DisplayWidthData.init(allocator);
defer data.deinit();
const self = Self{ .data = &data };
var right_aligned = try self.padLeft(allocator, "abc", 9, "*");
defer testing.allocator.free(right_aligned);
try testing.expectEqualSlices(u8, "******abc", right_aligned);
testing.allocator.free(right_aligned);
right_aligned = try self.padLeft(allocator, "w😊w", 10, "-");
try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
}
/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
/// on the right side. Caller must free returned bytes with `allocator`.
pub fn padRight(
self: Self,
allocator: mem.Allocator,
str: []const u8,
total_width: usize,
pad: []const u8,
) ![]u8 {
const str_width = self.strWidth(str);
if (str_width > total_width) return error.StrTooLong;
const pad_width = self.strWidth(pad);
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
const margin_width = total_width - str_width;
if (pad_width > margin_width) return error.PadTooLong;
const pads = @divFloor(margin_width, pad_width);
var result = try allocator.alloc(u8, pads * pad.len + str.len);
var bytes_index: usize = 0;
var pads_index: usize = 0;
@memcpy(result[bytes_index..][0..str.len], str);
bytes_index += str.len;
while (pads_index < pads) : (pads_index += 1) {
@memcpy(result[bytes_index..][0..pad.len], pad);
bytes_index += pad.len;
}
return result;
}
test "padRight" {
const allocator = testing.allocator;
const data = try DisplayWidthData.init(allocator);
defer data.deinit();
const self = Self{ .data = &data };
var left_aligned = try self.padRight(allocator, "abc", 9, "*");
defer testing.allocator.free(left_aligned);
try testing.expectEqualSlices(u8, "abc******", left_aligned);
testing.allocator.free(left_aligned);
left_aligned = try self.padRight(allocator, "w😊w", 10, "-");
try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
}
/// Wraps a string approximately at the given number of colums per line.
/// `threshold` defines how far the last column of the last word can be
/// from the edge. Caller must free returned bytes with `allocator`.
pub fn wrap(
self: Self,
allocator: mem.Allocator,
str: []const u8,
columns: usize,
threshold: usize,
) ![]u8 {
var result = ArrayList(u8).init(allocator);
defer result.deinit();
var line_iter = mem.tokenizeAny(u8, str, "\r\n");
var line_width: usize = 0;
while (line_iter.next()) |line| {
var word_iter = mem.tokenizeScalar(u8, line, ' ');
while (word_iter.next()) |word| {
try result.appendSlice(word);
try result.append(' ');
line_width += self.strWidth(word) + 1;
if (line_width > columns or columns - line_width <= threshold) {
try result.append('\n');
line_width = 0;
}
}
}
// Remove trailing space and newline.
_ = result.pop();
_ = result.pop();
return try result.toOwnedSlice();
}
test "wrap" {
const allocator = testing.allocator;
const data = try DisplayWidthData.init(allocator);
defer data.deinit();
const self = Self{ .data = &data };
const input = "The quick brown fox\r\njumped over the lazy dog!";
const got = try self.wrap(allocator, input, 10, 3);
defer testing.allocator.free(got);
const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
try testing.expectEqualStrings(want, got);
}

98
deps/zg/src/FoldData.zig vendored Normal file
View File

@@ -0,0 +1,98 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
allocator: mem.Allocator,
cutoff: u21 = undefined,
cwcf_exceptions_min: u21 = undefined,
cwcf_exceptions_max: u21 = undefined,
cwcf_exceptions: []u21 = undefined,
multiple_start: u21 = undefined,
stage1: []u8 = undefined,
stage2: []u8 = undefined,
stage3: []i24 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("fold");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
self.cutoff = @intCast(try reader.readInt(u24, endian));
self.multiple_start = @intCast(try reader.readInt(u24, endian));
var len = try reader.readInt(u16, endian);
self.stage1 = try allocator.alloc(u8, len);
errdefer allocator.free(self.stage1);
for (0..len) |i| self.stage1[i] = try reader.readInt(u8, endian);
len = try reader.readInt(u16, endian);
self.stage2 = try allocator.alloc(u8, len);
errdefer allocator.free(self.stage2);
for (0..len) |i| self.stage2[i] = try reader.readInt(u8, endian);
len = try reader.readInt(u16, endian);
self.stage3 = try allocator.alloc(i24, len);
errdefer allocator.free(self.stage3);
for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian);
self.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian));
self.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian));
len = try reader.readInt(u16, endian);
self.cwcf_exceptions = try allocator.alloc(u21, len);
for (0..len) |i| self.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.stage1);
self.allocator.free(self.stage2);
self.allocator.free(self.stage3);
}
/// Returns the case fold for `cp`.
pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 {
if (cp >= self.cutoff) return &.{};
const stage1_val = self.stage1[cp >> 8];
if (stage1_val == 0) return &.{};
const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF);
const stage3_index = self.stage2[stage2_index];
if (stage3_index & 0x80 != 0) {
const real_index = @as(usize, self.multiple_start) + (stage3_index ^ 0x80) * 3;
const mapping = mem.sliceTo(self.stage3[real_index..][0..3], 0);
for (mapping, 0..) |c, i| buf[i] = @intCast(c);
return buf[0..mapping.len];
}
const offset = self.stage3[stage3_index];
if (offset == 0) return &.{};
buf[0] = @intCast(@as(i32, cp) + offset);
return buf[0..1];
}
/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
pub fn changesWhenCaseFolded(self: Self, cp: u21) bool {
var buf: [3]u21 = undefined;
const has_mapping = self.caseFold(cp, &buf).len != 0;
return has_mapping and !self.isCwcfException(cp);
}
fn isCwcfException(self: Self, cp: u21) bool {
return cp >= self.cwcf_exceptions_min and
cp <= self.cwcf_exceptions_max and
std.mem.indexOfScalar(u21, self.cwcf_exceptions, cp) != null;
}

171
deps/zg/src/GenCatData.zig vendored Normal file
View File

@@ -0,0 +1,171 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
/// General Category
pub const Gc = enum {
Cc, // Other, Control
Cf, // Other, Format
Cn, // Other, Unassigned
Co, // Other, Private Use
Cs, // Other, Surrogate
Ll, // Letter, Lowercase
Lm, // Letter, Modifier
Lo, // Letter, Other
Lu, // Letter, Uppercase
Lt, // Letter, Titlecase
Mc, // Mark, Spacing Combining
Me, // Mark, Enclosing
Mn, // Mark, Non-Spacing
Nd, // Number, Decimal Digit
Nl, // Number, Letter
No, // Number, Other
Pc, // Punctuation, Connector
Pd, // Punctuation, Dash
Pe, // Punctuation, Close
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
Po, // Punctuation, Other
Ps, // Punctuation, Open
Sc, // Symbol, Currency
Sk, // Symbol, Modifier
Sm, // Symbol, Math
So, // Symbol, Other
Zl, // Separator, Line
Zp, // Separator, Paragraph
Zs, // Separator, Space
};
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u5 = undefined,
s3: []u5 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("gencat");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const s1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, s1_len);
errdefer allocator.free(self.s1);
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const s2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u5, s2_len);
errdefer allocator.free(self.s2);
for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
const s3_len: u16 = try reader.readInt(u8, endian);
self.s3 = try allocator.alloc(u5, s3_len);
errdefer allocator.free(self.s3);
for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
self.allocator.free(self.s3);
}
/// Lookup the General Category for `cp`.
pub fn gc(self: Self, cp: u21) Gc {
return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
}
/// True if `cp` has an C general category.
pub fn isControl(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Cc,
.Cf,
.Cn,
.Co,
.Cs,
=> true,
else => false,
};
}
/// True if `cp` has an L general category.
pub fn isLetter(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Ll,
.Lm,
.Lo,
.Lu,
.Lt,
=> true,
else => false,
};
}
/// True if `cp` has an M general category.
pub fn isMark(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Mc,
.Me,
.Mn,
=> true,
else => false,
};
}
/// True if `cp` has an N general category.
pub fn isNumber(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Nd,
.Nl,
.No,
=> true,
else => false,
};
}
/// True if `cp` has an P general category.
pub fn isPunctuation(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Pc,
.Pd,
.Pe,
.Pf,
.Pi,
.Po,
.Ps,
=> true,
else => false,
};
}
/// True if `cp` has an S general category.
pub fn isSymbol(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Sc,
.Sk,
.Sm,
.So,
=> true,
else => false,
};
}
/// True if `cp` has an Z general category.
pub fn isSeparator(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Zl,
.Zp,
.Zs,
=> true,
else => false,
};
}

88
deps/zg/src/GraphemeData.zig vendored Normal file
View File

@@ -0,0 +1,88 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
/// Indic syllable type.
pub const Indic = enum {
none,
Consonant,
Extend,
Linker,
};
/// Grapheme break property.
pub const Gbp = enum {
none,
Control,
CR,
Extend,
L,
LF,
LV,
LVT,
Prepend,
Regional_Indicator,
SpacingMark,
T,
V,
ZWJ,
};
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u16 = undefined,
s3: []u8 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("gbp");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const s1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, s1_len);
errdefer allocator.free(self.s1);
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const s2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u16, s2_len);
errdefer allocator.free(self.s2);
for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian);
const s3_len: u16 = try reader.readInt(u16, endian);
self.s3 = try allocator.alloc(u8, s3_len);
errdefer allocator.free(self.s3);
_ = try reader.readAll(self.s3);
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
self.allocator.free(self.s3);
}
/// Lookup the grapheme break property for a code point.
pub fn gbp(self: Self, cp: u21) Gbp {
return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
}
/// Lookup the indic syllable type for a code point.
pub fn indic(self: Self, cp: u21) Indic {
return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
}
/// Lookup the indic syllable type for a code point.
pub fn isEmoji(self: Self, cp: u21) bool {
return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
}

53
deps/zg/src/HangulData.zig vendored Normal file
View File

@@ -0,0 +1,53 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
pub const Syllable = enum {
none,
L,
LV,
LVT,
V,
T,
};
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u3 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("hangul");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const stage_1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.s1);
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const stage_2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u3, stage_2_len);
errdefer allocator.free(self.s2);
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
}
/// Returns the Hangul syllable type for `cp`.
pub fn syllable(self: Self, cp: u21) Syllable {
return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]);
}

37
deps/zg/src/NormData.zig vendored Normal file
View File

@@ -0,0 +1,37 @@
const std = @import("std");
const mem = std.mem;
const CanonData = @import("CanonData");
const CccData = @import("CombiningData");
const CompatData = @import("CompatData");
const FoldData = @import("FoldData");
const HangulData = @import("HangulData");
const NormPropsData = @import("NormPropsData");
canon_data: CanonData = undefined,
ccc_data: CccData = undefined,
compat_data: CompatData = undefined,
hangul_data: HangulData = undefined,
normp_data: NormPropsData = undefined,
const Self = @This();
pub fn init(self: *Self, allocator: std.mem.Allocator) !void {
self.canon_data = try CanonData.init(allocator);
errdefer self.canon_data.deinit();
self.ccc_data = try CccData.init(allocator);
errdefer self.ccc_data.deinit();
self.compat_data = try CompatData.init(allocator);
errdefer self.compat_data.deinit();
self.hangul_data = try HangulData.init(allocator);
errdefer self.hangul_data.deinit();
self.normp_data = try NormPropsData.init(allocator);
}
pub fn deinit(self: *Self) void {
self.canon_data.deinit();
self.ccc_data.deinit();
self.compat_data.deinit();
self.hangul_data.deinit();
self.normp_data.deinit();
}

54
deps/zg/src/NormPropsData.zig vendored Normal file
View File

@@ -0,0 +1,54 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u4 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("normp");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const stage_1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.s1);
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const stage_2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u4, stage_2_len);
errdefer allocator.free(self.s2);
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
}
/// Returns true if `cp` is already in NFD form.
pub fn isNfd(self: Self, cp: u21) bool {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0;
}
/// Returns true if `cp` is already in NFKD form.
pub fn isNfkd(self: Self, cp: u21) bool {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0;
}
/// Returns true if `cp` is not allowed in any normalized form.
pub fn isFcx(self: Self, cp: u21) bool {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}

622
deps/zg/src/Normalize.zig vendored Normal file
View File

@@ -0,0 +1,622 @@
//! Normalizer contains functions and methods that implement
//! Unicode Normalization. You can normalize strings into NFC,
//! NFKC, NFD, and NFKD normalization forms.
const std = @import("std");
const debug = std.debug;
const assert = debug.assert;
const fmt = std.fmt;
const heap = std.heap;
const mem = std.mem;
const simd = std.simd;
const testing = std.testing;
const unicode = std.unicode;
const ascii = @import("ascii");
const CodePointIterator = @import("code_point").Iterator;
pub const NormData = @import("NormData");
norm_data: *const NormData,
const Self = @This();
const SBase: u21 = 0xAC00;
const LBase: u21 = 0x1100;
const VBase: u21 = 0x1161;
const TBase: u21 = 0x11A7;
const LCount: u21 = 19;
const VCount: u21 = 21;
const TCount: u21 = 28;
const NCount: u21 = 588; // VCount * TCount
const SCount: u21 = 11172; // LCount * NCount
fn decomposeHangul(self: Self, cp: u21, buf: []u21) ?Decomp {
const kind = self.norm_data.hangul_data.syllable(cp);
if (kind != .LV and kind != .LVT) return null;
const SIndex: u21 = cp - SBase;
const LIndex: u21 = SIndex / NCount;
const VIndex: u21 = (SIndex % NCount) / TCount;
const TIndex: u21 = SIndex % TCount;
const LPart: u21 = LBase + LIndex;
const VPart: u21 = VBase + VIndex;
var dc = Decomp{ .form = .nfd };
buf[0] = LPart;
buf[1] = VPart;
if (TIndex == 0) {
dc.cps = buf[0..2];
return dc;
}
// TPart
buf[2] = TBase + TIndex;
dc.cps = buf[0..3];
return dc;
}
fn composeHangulCanon(lv: u21, t: u21) u21 {
assert(0x11A8 <= t and t <= 0x11C2);
return lv + (t - TBase);
}
fn composeHangulFull(l: u21, v: u21, t: u21) u21 {
assert(0x1100 <= l and l <= 0x1112);
assert(0x1161 <= v and v <= 0x1175);
const LIndex = l - LBase;
const VIndex = v - VBase;
const LVIndex = LIndex * NCount + VIndex * TCount;
if (t == 0) return SBase + LVIndex;
assert(0x11A8 <= t and t <= 0x11C2);
const TIndex = t - TBase;
return SBase + LVIndex + TIndex;
}
const Form = enum {
nfc,
nfd,
nfkc,
nfkd,
same,
};
const Decomp = struct {
form: Form = .same,
cps: []const u21 = &.{},
};
// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
fn mapping(self: Self, cp: u21, form: Form) Decomp {
var dc = Decomp{};
switch (form) {
.nfd => {
dc.cps = self.norm_data.canon_data.toNfd(cp);
if (dc.cps.len != 0) dc.form = .nfd;
},
.nfkd => {
dc.cps = self.norm_data.compat_data.toNfkd(cp);
if (dc.cps.len != 0) {
dc.form = .nfkd;
} else {
dc.cps = self.norm_data.canon_data.toNfd(cp);
if (dc.cps.len != 0) dc.form = .nfkd;
}
},
else => @panic("Normalizer.mapping only accepts form .nfd or .nfkd."),
}
return dc;
}
// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
fn decompose(
self: Self,
cp: u21,
form: Form,
buf: []u21,
) Decomp {
// ASCII
if (cp < 128) return .{};
// NFD / NFKD quick checks.
switch (form) {
.nfd => if (self.norm_data.normp_data.isNfd(cp)) return .{},
.nfkd => if (self.norm_data.normp_data.isNfkd(cp)) return .{},
else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."),
}
// Hangul precomposed syllable full decomposition.
if (self.decomposeHangul(cp, buf)) |dc| return dc;
// Full decomposition.
var dc = Decomp{ .form = form };
var result_index: usize = 0;
var work_index: usize = 1;
// Start work with argument code point.
var work = [_]u21{cp} ++ [_]u21{0} ** 17;
while (work_index > 0) {
// Look at previous code point in work queue.
work_index -= 1;
const next = work[work_index];
const m = self.mapping(next, form);
// No more of decompositions for this code point.
if (m.form == .same) {
buf[result_index] = next;
result_index += 1;
continue;
}
// Work backwards through decomposition.
// `i` starts at 1 because m_last is 1 past the last code point.
var i: usize = 1;
while (i <= m.cps.len) : ({
i += 1;
work_index += 1;
}) {
work[work_index] = m.cps[m.cps.len - i];
}
}
dc.cps = buf[0..result_index];
return dc;
}
test "decompose" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
var n = Self{ .norm_data = &data };
var buf: [18]u21 = undefined;
var dc = n.decompose('é', .nfd, &buf);
try testing.expect(dc.form == .nfd);
try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]);
dc = n.decompose('\u{1e0a}', .nfd, &buf);
try testing.expect(dc.form == .nfd);
try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
dc = n.decompose('\u{1e0a}', .nfkd, &buf);
try testing.expect(dc.form == .nfkd);
try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
dc = n.decompose('\u{3189}', .nfd, &buf);
try testing.expect(dc.form == .same);
try testing.expect(dc.cps.len == 0);
dc = n.decompose('\u{3189}', .nfkd, &buf);
try testing.expect(dc.form == .nfkd);
try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]);
dc = n.decompose('\u{ace1}', .nfd, &buf);
try testing.expect(dc.form == .nfd);
try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
dc = n.decompose('\u{ace1}', .nfkd, &buf);
try testing.expect(dc.form == .nfd);
try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
dc = n.decompose('\u{3d3}', .nfd, &buf);
try testing.expect(dc.form == .nfd);
try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]);
dc = n.decompose('\u{3d3}', .nfkd, &buf);
try testing.expect(dc.form == .nfkd);
try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]);
}
/// Returned from various functions in this namespace. Remember to call `deinit` to free any allocated memory.
pub const Result = struct {
allocator: ?mem.Allocator = null,
slice: []const u8,
pub fn deinit(self: *const Result) void {
if (self.allocator) |allocator| allocator.free(self.slice);
}
};
// Compares code points by Canonical Combining Class order.
fn cccLess(self: Self, lhs: u21, rhs: u21) bool {
return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs);
}
// Applies the Canonical Sorting Algorithm.
fn canonicalSort(self: Self, cps: []u21) void {
var i: usize = 0;
while (i < cps.len) : (i += 1) {
const start: usize = i;
while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
mem.sort(u21, cps[start..i], self, cccLess);
}
}
/// Normalize `str` to NFD.
pub fn nfd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
return self.nfxd(allocator, str, .nfd);
}
/// Normalize `str` to NFKD.
pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
return self.nfxd(allocator, str, .nfkd);
}
pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error![]u21 {
var dcp_list = std.ArrayList(u21).init(allocator);
defer dcp_list.deinit();
var cp_iter = CodePointIterator{ .bytes = str };
var dc_buf: [18]u21 = undefined;
while (cp_iter.next()) |cp| {
const dc = self.decompose(cp.code, form, &dc_buf);
if (dc.form == .same) {
try dcp_list.append(cp.code);
} else {
try dcp_list.appendSlice(dc.cps);
}
}
self.canonicalSort(dcp_list.items);
return try dcp_list.toOwnedSlice();
}
fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
// Quick checks.
if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
const dcps = try self.nfxdCodePoints(allocator, str, form);
defer allocator.free(dcps);
var dstr_list = std.ArrayList(u8).init(allocator);
defer dstr_list.deinit();
var buf: [4]u8 = undefined;
for (dcps) |dcp| {
const len = unicode.utf8Encode(dcp, &buf) catch unreachable;
try dstr_list.appendSlice(buf[0..len]);
}
return Result{ .allocator = allocator, .slice = try dstr_list.toOwnedSlice() };
}
test "nfd ASCII / no-alloc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfd(allocator, "Hello World!");
defer result.deinit();
try testing.expectEqualStrings("Hello World!", result.slice);
}
test "nfd !ASCII / alloc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
defer result.deinit();
try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
}
test "nfkd ASCII / no-alloc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfkd(allocator, "Hello World!");
defer result.deinit();
try testing.expectEqualStrings("Hello World!", result.slice);
}
test "nfkd !ASCII / alloc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
defer result.deinit();
try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
}
pub fn nfdCodePoints(
self: Self,
allocator: mem.Allocator,
cps: []const u21,
) mem.Allocator.Error![]u21 {
var dcp_list = std.ArrayList(u21).init(allocator);
defer dcp_list.deinit();
var dc_buf: [18]u21 = undefined;
for (cps) |cp| {
const dc = self.decompose(cp, .nfd, &dc_buf);
if (dc.form == .same) {
try dcp_list.append(cp);
} else {
try dcp_list.appendSlice(dc.cps);
}
}
self.canonicalSort(dcp_list.items);
return try dcp_list.toOwnedSlice();
}
pub fn nfkdCodePoints(
self: Self,
allocator: mem.Allocator,
cps: []const u21,
) mem.Allocator.Error![]u21 {
var dcp_list = std.ArrayList(u21).init(allocator);
defer dcp_list.deinit();
var dc_buf: [18]u21 = undefined;
for (cps) |cp| {
const dc = self.decompose(cp, .nfkd, &dc_buf);
if (dc.form == .same) {
try dcp_list.append(cp);
} else {
try dcp_list.appendSlice(dc.cps);
}
}
self.canonicalSort(dcp_list.items);
return try dcp_list.toOwnedSlice();
}
// Composition (NFC, NFKC)
fn isHangul(self: Self, cp: u21) bool {
return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none;
}
/// Normalizes `str` to NFC.
pub fn nfc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
return self.nfxc(allocator, str, .nfc);
}
/// Normalizes `str` to NFKC.
pub fn nfkc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
return self.nfxc(allocator, str, .nfkc);
}
fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
// Quick checks.
if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
// Decompose first.
var dcps = if (form == .nfc)
try self.nfxdCodePoints(allocator, str, .nfd)
else
try self.nfxdCodePoints(allocator, str, .nfkd);
defer allocator.free(dcps);
// Compose
const tombstone = 0xe000; // Start of BMP Private Use Area
// Loop over all decomposed code points.
while (true) {
var i: usize = 1; // start at second code point.
var deleted: usize = 0;
// For each code point, C, find the preceding
// starter code point L, if any.
block_check: while (i < dcps.len) : (i += 1) {
const C = dcps[i];
if (C == tombstone) continue :block_check;
const cc_C = self.norm_data.ccc_data.ccc(C);
var starter_index: ?usize = null;
var j: usize = i;
// Seek back to find starter L, if any.
while (true) {
j -= 1;
if (dcps[j] == tombstone) continue;
// Check for starter.
if (self.norm_data.ccc_data.isStarter(dcps[j])) {
// Check for blocking conditions.
for (dcps[(j + 1)..i]) |B| {
if (B == tombstone) continue;
const cc_B = self.norm_data.ccc_data.ccc(B);
if (cc_B != 0 and self.isHangul(C)) continue :block_check;
if (cc_B >= cc_C) continue :block_check;
}
// Found starter at j.
starter_index = j;
break;
}
if (j == 0) break;
}
// If we have a starter L, see if there's a primary
// composite, P, for the sequence L, C. If so, we must
// repace L with P and delete C.
if (starter_index) |sidx| {
const L = dcps[sidx];
var processed_hangul = false;
// If L and C are Hangul syllables, we can compose
// them algorithmically if possible.
if (self.isHangul(L) and self.isHangul(C)) {
// Get Hangul syllable types.
const l_stype = self.norm_data.hangul_data.syllable(L);
const c_stype = self.norm_data.hangul_data.syllable(C);
if (l_stype == .LV and c_stype == .T) {
// LV, T canonical composition.
dcps[sidx] = composeHangulCanon(L, C);
dcps[i] = tombstone; // Mark for deletion.
processed_hangul = true;
}
if (l_stype == .L and c_stype == .V) {
// L, V full composition. L, V, T is handled via main loop.
dcps[sidx] = composeHangulFull(L, C, 0);
dcps[i] = tombstone; // Mark for deletion.
processed_hangul = true;
}
if (processed_hangul) deleted += 1;
}
// If no composition has occurred yet.
if (!processed_hangul) {
// L, C are not Hangul, so check for primary composite
// in the Unicode Character Database.
if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| {
// We have a primary composite P for L, C.
// We must check if P is not in the Full
// Composition Exclusions (FCX) list,
// preventing it from appearing in any
// composed form (NFC, NFKC).
if (!self.norm_data.normp_data.isFcx(P)) {
dcps[sidx] = P;
dcps[i] = tombstone; // Mark for deletion.
deleted += 1;
}
}
}
}
}
// If we have no deletions. the code point sequence
// has been fully composed.
if (deleted == 0) {
var cstr_list = std.ArrayList(u8).init(allocator);
defer cstr_list.deinit();
var buf: [4]u8 = undefined;
for (dcps) |cp| {
if (cp == tombstone) continue; // "Delete"
const len = unicode.utf8Encode(cp, &buf) catch unreachable;
try cstr_list.appendSlice(buf[0..len]);
}
return Result{ .allocator = allocator, .slice = try cstr_list.toOwnedSlice() };
}
}
}
test "nfc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
defer result.deinit();
try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
}
test "nfkc" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
defer result.deinit();
try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
}
/// Tests for equality of `a` and `b` after normalizing to NFC.
pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool {
const norm_result_a = try self.nfc(allocator, a);
defer norm_result_a.deinit();
const norm_result_b = try self.nfc(allocator, b);
defer norm_result_b.deinit();
return mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
}
test "eql" {
const allocator = testing.allocator;
var data: NormData = undefined;
try NormData.init(&data, allocator);
defer data.deinit();
const n = Self{ .norm_data = &data };
try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
}
/// Returns true if `str` only contains Latin-1 Supplement
/// code points. Uses SIMD if possible.
pub fn isLatin1Only(str: []const u8) bool {
var cp_iter = CodePointIterator{ .bytes = str };
const vec_len = simd.suggestVectorLength(u21) orelse return blk: {
break :blk while (cp_iter.next()) |cp| {
if (cp.code > 256) break false;
} else true;
};
const Vec = @Vector(vec_len, u21);
outer: while (true) {
var v1: Vec = undefined;
const saved_cp_i = cp_iter.i;
for (0..vec_len) |i| {
if (cp_iter.next()) |cp| {
v1[i] = cp.code;
} else {
cp_iter.i = saved_cp_i;
break :outer;
}
}
const v2: Vec = @splat(256);
if (@reduce(.Or, v1 > v2)) return false;
}
return while (cp_iter.next()) |cp| {
if (cp.code > 256) break false;
} else true;
}
test "isLatin1Only" {
const latin1_only = "Hello, World! \u{fe} \u{ff}";
try testing.expect(isLatin1Only(latin1_only));
const not_latin1_only = "Héllo, World! \u{3d3}";
try testing.expect(!isLatin1Only(not_latin1_only));
}

164
deps/zg/src/PropsData.zig vendored Normal file
View File

@@ -0,0 +1,164 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
allocator: mem.Allocator,
core_s1: []u16 = undefined,
core_s2: []u8 = undefined,
props_s1: []u16 = undefined,
props_s2: []u8 = undefined,
num_s1: []u16 = undefined,
num_s2: []u8 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const endian = builtin.cpu.arch.endian();
// Process DerivedCoreProperties.txt
const core_bytes = @embedFile("core_props");
var core_fbs = std.io.fixedBufferStream(core_bytes);
var core_decomp = decompressor(.raw, core_fbs.reader());
var core_reader = core_decomp.reader();
var self = Self{ .allocator = allocator };
const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
errdefer allocator.free(self.core_s1);
for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);
const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
errdefer allocator.free(self.core_s2);
_ = try core_reader.readAll(self.core_s2);
// Process PropList.txt
const props_bytes = @embedFile("props");
var props_fbs = std.io.fixedBufferStream(props_bytes);
var props_decomp = decompressor(.raw, props_fbs.reader());
var props_reader = props_decomp.reader();
const stage_1_len: u16 = try props_reader.readInt(u16, endian);
self.props_s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.props_s1);
for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);
const stage_2_len: u16 = try props_reader.readInt(u16, endian);
self.props_s2 = try allocator.alloc(u8, stage_2_len);
errdefer allocator.free(self.props_s2);
_ = try props_reader.readAll(self.props_s2);
// Process DerivedNumericType.txt
const num_bytes = @embedFile("numeric");
var num_fbs = std.io.fixedBufferStream(num_bytes);
var num_decomp = decompressor(.raw, num_fbs.reader());
var num_reader = num_decomp.reader();
const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
self.num_s1 = try allocator.alloc(u16, num_stage_1_len);
errdefer allocator.free(self.num_s1);
for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian);
const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
self.num_s2 = try allocator.alloc(u8, num_stage_2_len);
errdefer allocator.free(self.num_s2);
_ = try num_reader.readAll(self.num_s2);
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.core_s1);
self.allocator.free(self.core_s2);
self.allocator.free(self.props_s1);
self.allocator.free(self.props_s2);
self.allocator.free(self.num_s1);
self.allocator.free(self.num_s2);
}
/// True if `cp` is a mathematical symbol.
pub fn isMath(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}
/// True if `cp` is an alphabetic character.
pub fn isAlphabetic(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}
/// True if `cp` is a valid identifier start character.
pub fn isIdStart(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}
/// True if `cp` is a valid identifier continuation character.
pub fn isIdContinue(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
}
/// True if `cp` is a valid extended identifier start character.
pub fn isXidStart(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
}
/// True if `cp` is a valid extended identifier continuation character.
pub fn isXidContinue(self: Self, cp: u21) bool {
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
}
/// True if `cp` is a whitespace character.
pub fn isWhitespace(self: Self, cp: u21) bool {
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}
/// True if `cp` is a hexadecimal digit.
pub fn isHexDigit(self: Self, cp: u21) bool {
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}
/// True if `cp` is a diacritic mark.
pub fn isDiacritic(self: Self, cp: u21) bool {
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}
/// True if `cp` is numeric.
pub fn isNumeric(self: Self, cp: u21) bool {
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}
/// True if `cp` is a digit.
pub fn isDigit(self: Self, cp: u21) bool {
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}
/// True if `cp` is decimal.
pub fn isDecimal(self: Self, cp: u21) bool {
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}
test "Props" {
const self = try init(testing.allocator);
defer self.deinit();
try testing.expect(self.isHexDigit('F'));
try testing.expect(self.isHexDigit('a'));
try testing.expect(self.isHexDigit('8'));
try testing.expect(!self.isHexDigit('z'));
try testing.expect(self.isDiacritic('\u{301}'));
try testing.expect(self.isAlphabetic('A'));
try testing.expect(!self.isAlphabetic('3'));
try testing.expect(self.isMath('+'));
try testing.expect(self.isNumeric('\u{277f}'));
try testing.expect(self.isDigit('\u{2070}'));
try testing.expect(self.isDecimal('3'));
try testing.expect(!self.isNumeric('1'));
try testing.expect(!self.isDigit('2'));
try testing.expect(!self.isDecimal('g'));
}

228
deps/zg/src/ScriptsData.zig vendored Normal file
View File

@@ -0,0 +1,228 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
/// Scripts
pub const Script = enum {
none,
Adlam,
Ahom,
Anatolian_Hieroglyphs,
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bassa_Vah,
Batak,
Bengali,
Bhaiksuki,
Bopomofo,
Brahmi,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
Cypriot,
Cypro_Minoan,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
Elymaic,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Grantha,
Greek,
Gujarati,
Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kawi,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
Lao,
Latin,
Lepcha,
Limbu,
Linear_A,
Linear_B,
Lisu,
Lycian,
Lydian,
Mahajani,
Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
Meroitic_Hieroglyphs,
Miao,
Modi,
Mongolian,
Mro,
Multani,
Myanmar,
Nabataean,
Nag_Mundari,
Nandinagari,
New_Tai_Lue,
Newa,
Nko,
Nushu,
Nyiakeng_Puachue_Hmong,
Ogham,
Ol_Chiki,
Old_Hungarian,
Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Osage,
Osmanya,
Pahawh_Hmong,
Palmyrene,
Pau_Cin_Hau,
Phags_Pa,
Phoenician,
Psalter_Pahlavi,
Rejang,
Runic,
Samaritan,
Saurashtra,
Sharada,
Shavian,
Siddham,
SignWriting,
Sinhala,
Sogdian,
Sora_Sompeng,
Soyombo,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Takri,
Tamil,
Tangsa,
Tangut,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Toto,
Ugaritic,
Vai,
Vithkuqi,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square,
};
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u8 = undefined,
s3: []u8 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("scripts");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const s1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, s1_len);
errdefer allocator.free(self.s1);
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const s2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u8, s2_len);
errdefer allocator.free(self.s2);
_ = try reader.readAll(self.s2);
const s3_len: u16 = try reader.readInt(u8, endian);
self.s3 = try allocator.alloc(u8, s3_len);
errdefer allocator.free(self.s3);
_ = try reader.readAll(self.s3);
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
self.allocator.free(self.s3);
}
/// Lookup the Script type for `cp`.
pub fn script(self: Self, cp: u21) ?Script {
const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
if (byte == 0) return null;
return @enumFromInt(byte);
}
test "script" {
const self = try init(std.testing.allocator);
defer self.deinit();
try testing.expectEqual(Script.Latin, self.script('A').?);
}

84
deps/zg/src/WidthData.zig vendored Normal file
View File

@@ -0,0 +1,84 @@
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;
const GraphemeData = @import("GraphemeData");
allocator: mem.Allocator,
g_data: GraphemeData,
s1: []u16 = undefined,
s2: []i3 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("dwp");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{
.allocator = allocator,
.g_data = try GraphemeData.init(allocator),
};
errdefer self.g_data.deinit();
const stage_1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, stage_1_len);
errdefer allocator.free(self.s1);
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const stage_2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(i3, stage_2_len);
errdefer allocator.free(self.s2);
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
self.g_data.deinit();
}
/// codePointWidth returns the number of cells `cp` requires when rendered
/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
/// otherwise they return 1.
pub fn codePointWidth(self: Self, cp: u21) i3 {
return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
}
test "codePointWidth" {
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
}

33
deps/zg/src/ascii.zig vendored Normal file
View File

@@ -0,0 +1,33 @@
const std = @import("std");
const simd = std.simd;
const testing = std.testing;
/// Returns true if `str` only contains ASCII bytes. Uses SIMD if possible.
pub fn isAsciiOnly(str: []const u8) bool {
const vec_len = simd.suggestVectorLength(u8) orelse return for (str) |b| {
if (b > 127) break false;
} else true;
const Vec = @Vector(vec_len, u8);
var remaining = str;
while (true) {
if (remaining.len < vec_len) return for (remaining) |b| {
if (b > 127) break false;
} else true;
const v1 = remaining[0..vec_len].*;
const v2: Vec = @splat(127);
if (@reduce(.Or, v1 > v2)) return false;
remaining = remaining[vec_len..];
}
return true;
}
test "isAsciiOnly" {
const ascii_only = "Hello, World! 0123456789 !@#$%^&*()_-=+";
try testing.expect(isAsciiOnly(ascii_only));
const not_ascii_only = "Héllo, World! 0123456789 !@#$%^&*()_-=+";
try testing.expect(!isAsciiOnly(not_ascii_only));
}

118
deps/zg/src/code_point.zig vendored Normal file
View File

@@ -0,0 +1,118 @@
const std = @import("std");
/// `CodePoint` represents a Unicode code point by its code,
/// length, and offset in the source bytes.
pub const CodePoint = struct {
code: u21,
len: u3,
offset: u32,
};
/// given a small slice of a string, decode the corresponding codepoint
pub fn decode(bytes: []const u8, offset: u32) ?CodePoint {
// EOS fast path
if (bytes.len == 0) {
return null;
}
// ASCII fast path
if (bytes[0] < 128) {
return .{
.code = bytes[0],
.len = 1,
.offset = offset,
};
}
var cp = CodePoint{
.code = undefined,
.len = switch (bytes[0]) {
0b1100_0000...0b1101_1111 => 2,
0b1110_0000...0b1110_1111 => 3,
0b1111_0000...0b1111_0111 => 4,
else => {
// unicode replacement code point.
return .{
.code = 0xfffd,
.len = 1,
.offset = offset,
};
},
},
.offset = offset,
};
// Return replacement if we don' have a complete codepoint remaining. Consumes only one byte
if (cp.len > bytes.len) {
// Unicode replacement code point.
return .{
.code = 0xfffd,
.len = 1,
.offset = offset,
};
}
const cp_bytes = bytes[0..cp.len];
cp.code = switch (cp.len) {
2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
(cp_bytes[1] & 0b00111111)) << 6) |
(cp_bytes[2] & 0b00111111),
4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
(cp_bytes[1] & 0b00111111)) << 6) |
(cp_bytes[2] & 0b00111111)) << 6) |
(cp_bytes[3] & 0b00111111),
else => @panic("CodePointIterator.next invalid code point length."),
};
return cp;
}
/// `Iterator` iterates a string one `CodePoint` at-a-time.
pub const Iterator = struct {
bytes: []const u8,
i: u32 = 0,
pub fn next(self: *Iterator) ?CodePoint {
if (self.i >= self.bytes.len) return null;
const res = decode(self.bytes[self.i..], self.i);
if (res) |cp| {
self.i += cp.len;
}
return res;
}
pub fn peek(self: *Iterator) ?CodePoint {
const saved_i = self.i;
defer self.i = saved_i;
return self.next();
}
};
test "decode" {
const bytes = "🌩️";
const res = decode(bytes, 0);
if (res) |cp| {
try std.testing.expectEqual(@as(u21, 0x1F329), cp.code);
try std.testing.expectEqual(4, cp.len);
} else {
// shouldn't have failed to return
try std.testing.expect(false);
}
}
test "peek" {
var iter = Iterator{ .bytes = "Hi" };
try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
}

258
deps/zg/src/grapheme.zig vendored Normal file
View File

@@ -0,0 +1,258 @@
const std = @import("std");
const mem = std.mem;
const unicode = std.unicode;
const CodePoint = @import("code_point").CodePoint;
const CodePointIterator = @import("code_point").Iterator;
pub const GraphemeData = @import("GraphemeData");
/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
pub const Grapheme = struct {
len: u8,
offset: u32,
/// `bytes` returns the slice of bytes that correspond to
/// this grapheme cluster in `src`.
pub fn bytes(self: Grapheme, src: []const u8) []const u8 {
return src[self.offset..][0..self.len];
}
};
/// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.
pub const Iterator = struct {
buf: [2]?CodePoint = .{ null, null },
cp_iter: CodePointIterator,
data: *const GraphemeData,
const Self = @This();
/// Assumes `src` is valid UTF-8.
pub fn init(str: []const u8, data: *const GraphemeData) Self {
var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
self.advance();
return self;
}
fn advance(self: *Self) void {
self.buf[0] = self.buf[1];
self.buf[1] = self.cp_iter.next();
}
pub fn next(self: *Self) ?Grapheme {
self.advance();
// If no more
if (self.buf[0] == null) return null;
// If last one
if (self.buf[1] == null) return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
// If ASCII
if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
}
const gc_start = self.buf[0].?.offset;
var gc_len: u8 = self.buf[0].?.len;
var state = State{};
if (graphemeBreak(
self.buf[0].?.code,
self.buf[1].?.code,
self.data,
&state,
)) return Grapheme{ .len = gc_len, .offset = gc_start };
while (true) {
self.advance();
if (self.buf[0] == null) break;
gc_len += self.buf[0].?.len;
if (graphemeBreak(
self.buf[0].?.code,
if (self.buf[1]) |ncp| ncp.code else 0,
self.data,
&state,
)) break;
}
return Grapheme{ .len = gc_len, .offset = gc_start };
}
};
// Predicates
fn isBreaker(cp: u21, data: *const GraphemeData) bool {
// Extract relevant properties.
const cp_gbp_prop = data.gbp(cp);
return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
}
// Grapheme break state.
pub const State = struct {
bits: u3 = 0,
// Extended Pictographic (emoji)
fn hasXpic(self: State) bool {
return self.bits & 1 == 1;
}
fn setXpic(self: *State) void {
self.bits |= 1;
}
fn unsetXpic(self: *State) void {
self.bits ^= 1;
}
// Regional Indicatior (flags)
fn hasRegional(self: State) bool {
return self.bits & 2 == 2;
}
fn setRegional(self: *State) void {
self.bits |= 2;
}
fn unsetRegional(self: *State) void {
self.bits ^= 2;
}
// Indic Conjunct
fn hasIndic(self: State) bool {
return self.bits & 4 == 4;
}
fn setIndic(self: *State) void {
self.bits |= 4;
}
fn unsetIndic(self: *State) void {
self.bits ^= 4;
}
};
/// `graphemeBreak` returns true only if a grapheme break point is required
/// between `cp1` and `cp2`. `state` should start out as 0. If calling
/// iteratively over a sequence of code points, this function must be called
/// IN ORDER on ALL potential breaks in a string.
/// Modeled after the API of utf8proc's `utf8proc_grapheme_break_stateful`.
/// https://github.com/JuliaStrings/utf8proc/blob/2bbb1ba932f727aad1fab14fafdbc89ff9dc4604/utf8proc.h#L599-L617
pub fn graphemeBreak(
cp1: u21,
cp2: u21,
data: *const GraphemeData,
state: *State,
) bool {
// Extract relevant properties.
const cp1_gbp_prop = data.gbp(cp1);
const cp1_indic_prop = data.indic(cp1);
const cp1_is_emoji = data.isEmoji(cp1);
const cp2_gbp_prop = data.gbp(cp2);
const cp2_indic_prop = data.indic(cp2);
const cp2_is_emoji = data.isEmoji(cp2);
// GB11: Emoji Extend* ZWJ x Emoji
if (!state.hasXpic() and cp1_is_emoji) state.setXpic();
// GB9c: Indic Conjunct Break
if (!state.hasIndic() and cp1_indic_prop == .Consonant) state.setIndic();
// GB3: CR x LF
if (cp1 == '\r' and cp2 == '\n') return false;
// GB4: Control
if (isBreaker(cp1, data)) return true;
// GB11: Emoji Extend* ZWJ x Emoji
if (state.hasXpic() and
cp1_gbp_prop == .ZWJ and
cp2_is_emoji)
{
state.unsetXpic();
return false;
}
// GB9b: x (Extend | ZWJ)
if (cp2_gbp_prop == .Extend or cp2_gbp_prop == .ZWJ) return false;
// GB9a: x Spacing
if (cp2_gbp_prop == .SpacingMark) return false;
// GB9b: Prepend x
if (cp1_gbp_prop == .Prepend and !isBreaker(cp2, data)) return false;
// GB12, GB13: RI x RI
if (cp1_gbp_prop == .Regional_Indicator and cp2_gbp_prop == .Regional_Indicator) {
if (state.hasRegional()) {
state.unsetRegional();
return true;
} else {
state.setRegional();
return false;
}
}
// GB6: Hangul L x (L|V|LV|VT)
if (cp1_gbp_prop == .L) {
if (cp2_gbp_prop == .L or
cp2_gbp_prop == .V or
cp2_gbp_prop == .LV or
cp2_gbp_prop == .LVT) return false;
}
// GB7: Hangul (LV | V) x (V | T)
if (cp1_gbp_prop == .LV or cp1_gbp_prop == .V) {
if (cp2_gbp_prop == .V or
cp2_gbp_prop == .T) return false;
}
// GB8: Hangul (LVT | T) x T
if (cp1_gbp_prop == .LVT or cp1_gbp_prop == .T) {
if (cp2_gbp_prop == .T) return false;
}
// GB9c: Indic Conjunct Break
if (state.hasIndic() and
cp1_indic_prop == .Consonant and
(cp2_indic_prop == .Extend or cp2_indic_prop == .Linker))
{
return false;
}
if (state.hasIndic() and
cp1_indic_prop == .Extend and
cp2_indic_prop == .Linker)
{
return false;
}
if (state.hasIndic() and
(cp1_indic_prop == .Linker or cp1_gbp_prop == .ZWJ) and
cp2_indic_prop == .Consonant)
{
state.unsetIndic();
return false;
}
return true;
}
test "Segmentation ZWJ and ZWSP emoji sequences" {
const seq_1 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
const seq_2 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
const with_zwj = seq_1 ++ "\u{200D}" ++ seq_2;
const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
const no_joiner = seq_1 ++ seq_2;
const data = try GraphemeData.init(std.testing.allocator);
defer data.deinit();
var iter = Iterator.init(with_zwj, &data);
var i: usize = 0;
while (iter.next()) |_| : (i += 1) {}
try std.testing.expectEqual(@as(usize, 1), i);
iter = Iterator.init(with_zwsp, &data);
i = 0;
while (iter.next()) |_| : (i += 1) {}
try std.testing.expectEqual(@as(usize, 3), i);
iter = Iterator.init(no_joiner, &data);
i = 0;
while (iter.next()) |_| : (i += 1) {}
try std.testing.expectEqual(@as(usize, 2), i);
}

195
deps/zg/src/unicode_tests.zig vendored Normal file
View File

@@ -0,0 +1,195 @@
const std = @import("std");
const fmt = std.fmt;
const fs = std.fs;
const io = std.io;
const heap = std.heap;
const mem = std.mem;
const testing = std.testing;
const unicode = std.unicode;
const Grapheme = @import("grapheme").Grapheme;
const GraphemeData = @import("grapheme").GraphemeData;
const GraphemeIterator = @import("grapheme").Iterator;
const Normalize = @import("Normalize");
test "Unicode normalization tests" {
var arena = heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var allocator = arena.allocator();
var norm_data: Normalize.NormData = undefined;
try Normalize.NormData.init(&norm_data, allocator);
const n = Normalize{ .norm_data = &norm_data };
var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
defer file.close();
var buf_reader = io.bufferedReader(file.reader());
const input_stream = buf_reader.reader();
var line_no: usize = 0;
var buf: [4096]u8 = undefined;
var cp_buf: [4]u8 = undefined;
while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
line_no += 1;
// Skip comments or empty lines.
if (line.len == 0 or line[0] == '#' or line[0] == '@') continue;
// Iterate over fields.
var fields = mem.split(u8, line, ";");
var field_index: usize = 0;
var input: []u8 = undefined;
defer allocator.free(input);
while (fields.next()) |field| : (field_index += 1) {
if (field_index == 0) {
var i_buf = std.ArrayList(u8).init(allocator);
defer i_buf.deinit();
var i_fields = mem.split(u8, field, " ");
while (i_fields.next()) |s| {
const icp = try fmt.parseInt(u21, s, 16);
const len = try unicode.utf8Encode(icp, &cp_buf);
try i_buf.appendSlice(cp_buf[0..len]);
}
input = try i_buf.toOwnedSlice();
} else if (field_index == 1) {
//debug.print("\n*** {s} ***\n", .{line});
// NFC, time to test.
var w_buf = std.ArrayList(u8).init(allocator);
defer w_buf.deinit();
var w_fields = mem.split(u8, field, " ");
while (w_fields.next()) |s| {
const wcp = try fmt.parseInt(u21, s, 16);
const len = try unicode.utf8Encode(wcp, &cp_buf);
try w_buf.appendSlice(cp_buf[0..len]);
}
const want = w_buf.items;
var got = try n.nfc(allocator, input);
defer got.deinit();
try testing.expectEqualStrings(want, got.slice);
} else if (field_index == 2) {
// NFD, time to test.
var w_buf = std.ArrayList(u8).init(allocator);
defer w_buf.deinit();
var w_fields = mem.split(u8, field, " ");
while (w_fields.next()) |s| {
const wcp = try fmt.parseInt(u21, s, 16);
const len = try unicode.utf8Encode(wcp, &cp_buf);
try w_buf.appendSlice(cp_buf[0..len]);
}
const want = w_buf.items;
var got = try n.nfd(allocator, input);
defer got.deinit();
try testing.expectEqualStrings(want, got.slice);
} else if (field_index == 3) {
// NFKC, time to test.
var w_buf = std.ArrayList(u8).init(allocator);
defer w_buf.deinit();
var w_fields = mem.split(u8, field, " ");
while (w_fields.next()) |s| {
const wcp = try fmt.parseInt(u21, s, 16);
const len = try unicode.utf8Encode(wcp, &cp_buf);
try w_buf.appendSlice(cp_buf[0..len]);
}
const want = w_buf.items;
var got = try n.nfkc(allocator, input);
defer got.deinit();
try testing.expectEqualStrings(want, got.slice);
} else if (field_index == 4) {
// NFKD, time to test.
var w_buf = std.ArrayList(u8).init(allocator);
defer w_buf.deinit();
var w_fields = mem.split(u8, field, " ");
while (w_fields.next()) |s| {
const wcp = try fmt.parseInt(u21, s, 16);
const len = try unicode.utf8Encode(wcp, &cp_buf);
try w_buf.appendSlice(cp_buf[0..len]);
}
const want = w_buf.items;
const got = try n.nfkd(allocator, input);
defer got.deinit();
try testing.expectEqualStrings(want, got.slice);
} else {
continue;
}
}
}
}
test "Segmentation GraphemeIterator" {
const allocator = std.testing.allocator;
var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
defer file.close();
var buf_reader = std.io.bufferedReader(file.reader());
var input_stream = buf_reader.reader();
const data = try GraphemeData.init(allocator);
defer data.deinit();
var buf: [4096]u8 = undefined;
var line_no: usize = 1;
while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) {
// Skip comments or empty lines.
if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue;
// Clean up.
var line = std.mem.trimLeft(u8, raw, "÷ ");
if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| {
line = line[0..octo];
}
// Iterate over fields.
var want = std.ArrayList(Grapheme).init(allocator);
defer want.deinit();
var all_bytes = std.ArrayList(u8).init(allocator);
defer all_bytes.deinit();
var graphemes = std.mem.split(u8, line, " ÷ ");
var bytes_index: u32 = 0;
while (graphemes.next()) |field| {
var code_points = std.mem.split(u8, field, " ");
var cp_buf: [4]u8 = undefined;
var cp_index: u32 = 0;
var gc_len: u8 = 0;
while (code_points.next()) |code_point| {
if (std.mem.eql(u8, code_point, "×")) continue;
const cp: u21 = try std.fmt.parseInt(u21, code_point, 16);
const len = try unicode.utf8Encode(cp, &cp_buf);
try all_bytes.appendSlice(cp_buf[0..len]);
cp_index += len;
gc_len += len;
}
try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
bytes_index += cp_index;
}
// std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
var iter = GraphemeIterator.init(all_bytes.items, &data);
// Chaeck.
for (want.items) |want_gc| {
const got_gc = (iter.next()).?;
try std.testing.expectEqualStrings(
want_gc.bytes(all_bytes.items),
got_gc.bytes(all_bytes.items),
);
}
}
}

View File

@@ -0,0 +1,92 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<!-- saved from url=(0036)https://www.unicode.org/license.html -->
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="Content-Language" content="en-us">
<meta name="keywords" content="Unicode Standard, copyright">
<meta name="ProgId" content="FrontPage.Editor.Document">
<title>Unicode License Agreement</title>
<link rel="stylesheet" type="text/css" href="standard_styles.css">
<style type="text/css">
pre {
FONT-FAMILY: Arial, Geneva, sans-serif;
}
</style>
</head>
<body text="#330000">
<table width="100%" cellpadding="0" cellspacing="0" border="0">
<tbody><tr>
<!-- BEGIN CONTENTS -->
<td>
<blockquote>
<h2><a name="License">UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE</a></h2>
<pre>See <a href="https://www.unicode.org/copyright.html">Terms of Use</a> for definitions of Unicode Inc.'s
Data Files and Software.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
</pre>
</blockquote>
</td>
</tr>
</tbody></table>
</body></html>

View File

@@ -0,0 +1,218 @@
BODY {
margin: 0; COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
}
div.body { margin: 1em}
P {
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
}
TD {
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
}
LI {
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe;
margin-top: 0.25em; margin-bottom: 0.25em
}
A:link {
COLOR: #CC0000; text-decoration:none;
}
A:visited {
COLOR: #880000; text-decoration:none;
}
A:active {
COLOR: green
}
A:hover {
text-decoration:underline;
}
H1 {
FONT-WEIGHT: bold
}
H2 {
FONT-WEIGHT: bold
}
H3 {
FONT-WEIGHT: bold
}
H4 {
FONT-WEIGHT: bold
}
H1 {
MARGIN-TOP: 12px; FONT-SIZE: 180%; FONT-FAMILY: Arial, Geneva, sans-serif; TEXT-ALIGN: center
}
H2 {
MARGIN-TOP: 2em; FONT-SIZE: 120%; LINE-HEIGHT: 100%; FONT-FAMILY: Arial, Geneva, sans-serif
}
H3 {
MARGIN-TOP: 2em; FONT-SIZE: 105%; FONT-FAMILY: Arial, Geneva, sans-serif
}
H4 {
MARGIN-TOP: 2em; FONT-SIZE: 95%; FONT-FAMILY: Arial, Geneva, sans-serif
}
TD.bar {
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: right
}
P.bar {
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: right
}
TD.icon {
PADDING-RIGHT: 2px; PADDING-LEFT: 2px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 2px; COLOR: #fffffe; PADDING-TOP: 2px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: left
}
TD.gray {
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-SIZE: 50%; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; BACKGROUND-COLOR: #999999
}
A.bar {
FONT-SIZE: 100%
}
A.bar:link {
FONT-SIZE: 90%; COLOR: #fffffe
}
A.bar:visited {
FONT-SIZE: 90%; COLOR: #fffffe
}
A.bar:active {
FONT-SIZE: 90%; COLOR: #ff3333
}
A.bar:hover {
FONT-SIZE: 90%; COLOR: #ff3333
}
TD.navCol {
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; PADDING-BOTTOM: 4px; COLOR: #330000; PADDING-TOP: 4px; BACKGROUND-COLOR: #f0e0c0
}
TABLE.navColTable {
COLOR: #330000; BACKGROUND-COLOR: #f0e0c0
}
TD.navColTitle {
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 95%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: center
}
TD.navColCell {
FONT-SIZE: 90%; BACKGROUND-COLOR: #f0e0c0
}
TD.currentPage {
FONT-WEIGHT: bold; FONT-SIZE: 90%; FONT-STYLE: italic; BACKGROUND-COLOR: #f0e0c0
}
TD.contents {
}
TABLE.sidebar {
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FLOAT: right; PADDING-BOTTOM: 0px; MARGIN: 4px; MARGIN-LEFT: 1em; WIDTH: 40%; COLOR: #000000; PADDING-TOP: 0px; BACKGROUND-COLOR: #990000
}
TD.sidebarTitle {
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-WEIGHT: bold; FONT-SIZE: 95%; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000
}
TD.sidebar {
PADDING-RIGHT: 2px; PADDING-LEFT: 2px; FONT-SIZE: 90%; PADDING-BOTTOM: 2px; COLOR: #000000; PADDING-TOP: 2px; BACKGROUND-COLOR: #f0e0c0
}
P.q {
FONT-WEIGHT: bold; FONT-STYLE: italic; FONT-FAMILY: 'Century Schoolbook', serif
}
P.a {
MARGIN-LEFT: 16px; MARGIN-RIGHT: 16px; FONT-FAMILY: 'Century Schoolbook', serif
}
P.source {
FONT-SIZE: 90%; MARGIN-RIGHT: 16px; FONT-FAMILY: 'Century Schoolbook', serif; TEXT-ALIGN: right
}
UNKNOWN {
COLOR: #ffffee
}
TABLE.light {
COLOR: #330000; BACKGROUND-COLOR: #ffffee
}
TD.dark {
FONT-SIZE: 200%; COLOR: #ffffee; FONT-FAMILY: 'Century Schoolbook', serif; BACKGROUND-COLOR: #aa0000
}
H1 {
FONT-SIZE: 150%; MARGIN: 2px 0px; LINE-HEIGHT: 100%; FONT-FAMILY: Arial, Geneva, sans-serif; TEXT-ALIGN: center
}
H4 {
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; FONT-WEIGHT: bold; FONT-SIZE: 95%; BACKGROUND-COLOR: #fffffe;
margin-bottom:-15px
}
TD.head {
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-SIZE: 100%; FONT-WEIGHT: bold; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; BACKGROUND-COLOR: #999999
}
UL.one {
FONT-SIZE: 90%; MARGIN-LEFT:20px; TEXT-DECORATION: none; LIST-STYLE-TYPE: none
}
UL.two {
FONT-SIZE: 85%; TEXT-DECORATION: none; TEXT-ALIGN: left; LIST-STYLE-TYPE: none; MARGIN-LEFT:20px
}
span.changedspan { background-color: #FFFF00; border-style: dotted; border-width: 1px }
span.removedspan { text-decoration: line-through; background-color: #FFFF00; border-style: dotted; border-width: 1px }
table.simple { border-width:1px; border-style:solid; border-color:#A0A0A0;
border-collapse:collapse; padding:0.2em; font-size:1em}
table.simple th { border-width:1px; border-style:solid; border-color:#A0A0A0;
font-weight:bold; padding:5px; text-align: left; }
table.simple td {border-width:1px; border-style:solid; border-color:#A0A0A0;
padding:5px; text-align: left; }
table.subtle { border-width:1px; border-style:solid; border-color:#A0A0A0;
border-collapse:collapse; padding:0.2em; font-size:1em}
table.subtle th { border-width:1px; border-style:solid; border-color:#A0A0A0;
font-weight:bold; padding:5px; text-align: left; }
table.subtle td {border-width:1px; border-style:solid; border-color:#A0A0A0;
padding:5px; text-align: left; }
table.subtle-nb { border-style:none; border-width:0; border-collapse:collapse; }
table.subtle-nb th { border:solid 1px #F2F2F2; font-weight:bold; padding:5px; text-align:left; }
table.subtle-nb td { border-style:none; font-weight:normal; padding:5px; text-align:left; }
table.subtle-nb table.subtle th { border-width:1px; border-style:solid; border-color:#A0A0A0; }
table.subtle-nb table.subtle td { border-width:1px; border-style:solid; border-color:#A0A0A0; }
table.subtle-nb table.simple th { color: #000000; background-color:#FFFFFF; border-width:1px; border-style:solid;
border-color:#A0A0A0; }
table.subtle-nb table.simple td { border-width:1px; border-style:solid; border-color:#A0A0A0; }
table.subtle table.simple th { color: #000000; background-color:#FFFFFF; }
table.subtle th { color: #606060; background-color:#FAFAFA;}
table.subtle-nb th { color: #808080; background-color:#F8F8F8;}
table.subtle th p { color: #808080; background-color:#F8F8F8; }
table.subtle-nb tr th p { color: #808080; background-color:#F8F8F8; }
table.simple th p { margin:0; }
table.subtle th p { margin:0; }
table.subtle-nb th p { margin:0; }
/* first-child selector only works in IE if DOCTYPE has a URL (standards mode) */
/* the following remove space above first and below last paragraph (or list) inside a table cell, but preserve interparagraph spacing */
table.simple td>p:first-child { margin: 0; }
table.simple td>p { margin-top: 1.5em; }
table.subtle td>p:first-child { margin:0; }
table.subtle td>p { margin-top:1.5em; }
table.subtle-nb td>p:first-child { margin:0; }
table.subtle-nb td>p { margin-top:1.5em; }
table.simple td>ul:first-child { margin-top:0; margin-bottom:0; }
table.simple td>ol:first-child { margin-top:0; margin-bottom:0; }
table.simple td>ul { margin-top:1.5em; margin-bottom:0.5em; }
table.simple td>ol { margin-top:1.5em; margin-bottom:0.5em; }
table.subtle td>ul:first-child { margin-top:0; margin-bottom:0; }
table.subtle td>ol:first-child { margin-top:0; margin-bottom:0; }
table.subtle td>ul { margin-top:1.5em; margin-bottom:0.5em; }
table.subtle td>ol { margin-top:1.5em; margin-bottom:0.5em; }
table.subtle-nb td>ul:first-child { margin-top:0; margin-bottom:0; }
table.subtle-nb td>ol:first-child { margin-top:0; margin-bottom:0; }
table.subtle-nb td>ul { margin-top:1.5em; margin-bottom:0.5em; }
table.subtle-nb td>ol { margin-top:1.5em; margin-bottom:0.5em; }
/* hanging indent, so wide navcolcell items don't look like they are two entries on wrapping */
/* the value of .6 is chosen to work with navColCells that start with a manually added bullet character */
td.navColCell:first-child { padding-left:0.6em; text-indent: -.6em; }
/* hoisted from reports.css */
.changed { background-color: #FFFF00; border-style: dotted; border-width: 1px; }
.removed { text-decoration: line-through; background-color: #FFFF00; border-style: dotted; border-width: 1px; }
.reviewnote { background-color: #FFFF80; color: #CC0000; border-style: dashed; border-width: 1px; }