init
I will never get tired of vendoring dependencies. ha ha. It is possible I am insane. I had to do a lot of pruning to get these not to be ridiculous (especially the unicode data, which had nearly 1 million lines of... stuff).
This commit is contained in:
21
deps/zg/LICENSE
vendored
Normal file
21
deps/zg/LICENSE
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Jose Colon Rodriguez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
538
deps/zg/README.md
vendored
Normal file
538
deps/zg/README.md
vendored
Normal file
@@ -0,0 +1,538 @@
|
||||
# zg
|
||||
zg provides Unicode text processing for Zig projects.
|
||||
|
||||
## Unicode Version
|
||||
The Unicode version supported by zg is 15.1.0.
|
||||
|
||||
## Zig Version
|
||||
The minimum Zig version required is 0.13.0 stable.
|
||||
|
||||
## Integrating zg into your Zig Project
|
||||
You first need to add zg as a dependency in your `build.zig.zon` file. In your
|
||||
Zig project's root directory, run:
|
||||
|
||||
```plain
|
||||
zig fetch --save https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz
|
||||
```
|
||||
|
||||
Then instantiate the dependency in your `build.zig`:
|
||||
|
||||
```zig
|
||||
const zg = b.dependency("zg", .{});
|
||||
```
|
||||
|
||||
## A Modular Approach
|
||||
zg is a modular library. This approach minimizes binary file size and memory
|
||||
requirements by only including the Unicode data required for the specified module.
|
||||
The following sections describe the various modules and their specific use case.
|
||||
|
||||
## Code Points
|
||||
In the `code_point` module, you'll find a data structure representing a single code
|
||||
point, `CodePoint`, and an `Iterator` to iterate over the code points in a string.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("code_point", zg.module("code_point"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const code_point = @import("code_point");
|
||||
|
||||
test "Code point iterator" {
|
||||
const str = "Hi 😊";
|
||||
var iter = code_point.Iterator{ .bytes = str };
|
||||
var i: usize = 0;
|
||||
|
||||
while (iter.next()) |cp| : (i += 1) {
|
||||
// The `code` field is the actual code point scalar as a `u21`.
|
||||
if (i == 0) try expect(cp.code == 'H');
|
||||
if (i == 1) try expect(cp.code == 'i');
|
||||
if (i == 2) try expect(cp.code == ' ');
|
||||
|
||||
if (i == 3) {
|
||||
try expect(cp.code == '😊');
|
||||
|
||||
// The `offset` field is the byte offset in the
|
||||
// source string.
|
||||
try expect(cp.offset == 3);
|
||||
|
||||
// The `len` field is the length in bytes of the
|
||||
// code point in the source string.
|
||||
try expect(cp.len == 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Grapheme Clusters
|
||||
Many characters are composed from more than one code point. These are known as
|
||||
Grapheme Clusters and the `grapheme` module has a data structure to represent
|
||||
them, `Grapheme`, and an `Iterator` to iterate over them in a string.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("grapheme", zg.module("grapheme"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const grapheme = @import("grapheme");
|
||||
|
||||
test "Grapheme cluster iterator" {
|
||||
// we need some Unicode data to process Grapheme Clusters.
|
||||
const gd = try grapheme.GraphemeData.init(allocator);
|
||||
defer gd.deinit();
|
||||
|
||||
const str = "He\u{301}"; // Hé
|
||||
var iter = grapheme.Iterator.init(str, &gd);
|
||||
|
||||
var i: usize = 0;
|
||||
|
||||
while (iter.next()) |gc| : (i += 1) {
|
||||
// The `len` field is the length in bytes of the
|
||||
// grapheme cluster in the source string.
|
||||
if (i == 0) try expect(gc.len == 1);
|
||||
|
||||
if (i == 1) {
|
||||
try expect(gc.len == 3);
|
||||
|
||||
// The `offset` in bytes of the grapheme cluster
|
||||
// in the source string.
|
||||
try expect(gc.offset == 1);
|
||||
|
||||
// The `bytes` method returns the slice of bytes
|
||||
// that comprise this grapheme cluster in the
|
||||
// source string `str`.
|
||||
try expectEqualStrings("e\u{301}", gc.bytes(str));
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Unicode General Categories
|
||||
To detect the general category for a code point, use the `GenCatData` module.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const GenCatData = @import("GenCatData");
|
||||
|
||||
test "General Category" {
|
||||
const gcd = try GenCatData.init(allocator);
|
||||
defer gcd.deinit();
|
||||
|
||||
// The `gc` method returns the abbreviated General Category.
|
||||
// These abbreviations and descriptive comments can be found
|
||||
// in the source file `src/GenCatData.zig` as en enum.
|
||||
try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter
|
||||
try expect(gcd.gc('3') == .Nd); // Nd: decimal number
|
||||
|
||||
// The following are convenience methods for groups of General
|
||||
// Categories. For example, all letter categories start with `L`:
|
||||
// Lu, Ll, Lt, Lo.
|
||||
try expect(gcd.isControl(0));
|
||||
try expect(gcd.isLetter('z'));
|
||||
try expect(gcd.isMark('\u{301}'));
|
||||
try expect(gcd.isNumber('3'));
|
||||
try expect(gcd.isPunctuation('['));
|
||||
try expect(gcd.isSeparator(' '));
|
||||
try expect(gcd.isSymbol('©'));
|
||||
}
|
||||
```
|
||||
|
||||
## Unicode Properties
|
||||
You can detect common properties of a code point with the `PropsData` module.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("PropsData", zg.module("PropsData"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const PropsData = @import("PropsData");
|
||||
|
||||
test "Properties" {
|
||||
const pd = try PropsData.init(allocator);
|
||||
defer pd.deinit();
|
||||
|
||||
// Mathematical symbols and letters.
|
||||
try expect(pd.isMath('+'));
|
||||
// Alphabetic only code points.
|
||||
try expect(pd.isAlphabetic('Z'));
|
||||
// Space, tab, and other separators.
|
||||
try expect(pd.isWhitespace(' '));
|
||||
// Hexadecimal digits and variations thereof.
|
||||
try expect(pd.isHexDigit('f'));
|
||||
try expect(!pd.isHexDigit('z'));
|
||||
|
||||
// Accents, dieresis, and other combining marks.
|
||||
try expect(pd.isDiacritic('\u{301}'));
|
||||
|
||||
// Unicode has a specification for valid identifiers like
|
||||
// the ones used in programming and regular expressions.
|
||||
try expect(pd.isIdStart('Z')); // Identifier start character
|
||||
try expect(!pd.isIdStart('1'));
|
||||
try expect(pd.isIdContinue('1'));
|
||||
|
||||
// The `X` versions add some code points that can appear after
|
||||
// normalizing a string.
|
||||
try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character
|
||||
try expect(pd.isXidContinue('\u{e33}'));
|
||||
try expect(!pd.isXidStart('1'));
|
||||
|
||||
// Note surprising Unicode numeric type properties!
|
||||
try expect(pd.isNumeric('\u{277f}'));
|
||||
try expect(!pd.isNumeric('3')); // 3 is not numeric!
|
||||
try expect(pd.isDigit('\u{2070}'));
|
||||
try expect(!pd.isDigit('3')); // 3 is not a digit!
|
||||
try expect(pd.isDecimal('3')); // 3 is a decimal digit
|
||||
}
|
||||
```
|
||||
|
||||
## Letter Case Detection and Conversion
|
||||
To detect and convert to and from different letter cases, use the `CaseData`
|
||||
module.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("CaseData", zg.module("CaseData"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const CaseData = @import("CaseData");
|
||||
|
||||
test "Case" {
|
||||
const cd = try CaseData.init(allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
// Upper and lower case.
|
||||
try expect(cd.isUpper('A'));
|
||||
try expect('A' == cd.toUpper('a'));
|
||||
try expect(cd.isLower('a'));
|
||||
try expect('a' == cd.toLower('A'));
|
||||
|
||||
// Code points that have case.
|
||||
try expect(cd.isCased('É'));
|
||||
try expect(!cd.isCased('3'));
|
||||
|
||||
// Case detection and conversion for strings.
|
||||
try expect(cd.isUpperStr("HELLO 123!"));
|
||||
const ucased = try cd.toUpperStr(allocator, "hello 123");
|
||||
defer allocator.free(ucased);
|
||||
try expectEqualStrings("HELLO 123", ucased);
|
||||
|
||||
try expect(cd.isLowerStr("hello 123!"));
|
||||
const lcased = try cd.toLowerStr(allocator, "HELLO 123");
|
||||
defer allocator.free(lcased);
|
||||
try expectEqualStrings("hello 123", lcased);
|
||||
}
|
||||
```
|
||||
|
||||
## Normalization
|
||||
Unicode normalization is the process of converting a string into a uniform
|
||||
representation that can guarantee a known structure by following a strict set
|
||||
of rules. There are four normalization forms:
|
||||
|
||||
Canonical Composition (NFC)
|
||||
: The most compact representation obtained by first
|
||||
decomposing to Canonical Decomposition and then composing to NFC.
|
||||
|
||||
Compatibility Composition (NFKC)
|
||||
: The most comprehensive composition obtained
|
||||
by first decomposing to Compatibility Decomposition and then composing to NFKC.
|
||||
|
||||
Canonical Decomposition (NFD)
|
||||
: Only code points with canonical decompositions
|
||||
are decomposed. This is a more compact and faster decomposition but will not
|
||||
provide the most comprehensive normalization possible.
|
||||
|
||||
Compatibility Decomposition (NFKD)
|
||||
: The most comprehensive decomposition method
|
||||
where both canonical and compatibility decompositions are performed recursively.
|
||||
|
||||
zg has methods to produce all four normalization forms in the `Normalize` module.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("Normalize", zg.module("Normalize"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const Normalize = @import("Normalize");
|
||||
|
||||
test "Normalization" {
|
||||
// We need lots of Unicode dta for normalization.
|
||||
var norm_data: Normalize.NormData = undefined;
|
||||
try Normalize.NormData.init(&norm_data, allocator);
|
||||
defer norm_data.deinit();
|
||||
|
||||
// The `Normalize` structure takes a pointer to the data.
|
||||
const n = Normalize{ .norm_data = &norm_data };
|
||||
|
||||
// NFC: Canonical composition
|
||||
const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
|
||||
defer nfc_result.deinit();
|
||||
try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice);
|
||||
|
||||
// NFKC: Compatibility composition
|
||||
const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
|
||||
defer nfkc_result.deinit();
|
||||
try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice);
|
||||
|
||||
// NFD: Canonical decomposition
|
||||
const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}");
|
||||
defer nfd_result.deinit();
|
||||
try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice);
|
||||
|
||||
// NFKD: Compatibility decomposition
|
||||
const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
|
||||
defer nfkd_result.deinit();
|
||||
try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice);
|
||||
|
||||
// Test for equality of two strings after normalizing to NFC.
|
||||
try expect(try n.eql(allocator, "foé", "foe\u{0301}"));
|
||||
try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
|
||||
}
|
||||
```
|
||||
|
||||
## Caseless Matching via Case Folding
|
||||
Unicode provides a more efficient way of comparing strings while ignoring letter
|
||||
case differences: case folding. When you case fold a string, it's converted into a
|
||||
normalized case form suitable for efficient matching. Use the `CaseFold` module
|
||||
for this.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("Normalize", zg.module("Normalize"));
|
||||
exe.root_module.addImport("CaseFold", zg.module("CaseFold"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const Normalize = @import("Normalize");
|
||||
const CaseFold = @import("CaseFold");
|
||||
|
||||
test "Caseless matching" {
|
||||
// We need to normalize during the matching process.
|
||||
var norm_data: Normalize.NormData = undefined;
|
||||
try Normalize.NormData.init(&norm_data, allocator);
|
||||
defer norm_data.deinit();
|
||||
const n = Normalize{ .norm_data = &norm_data };
|
||||
|
||||
// We need Unicode case fold data.
|
||||
const cfd = try CaseFold.FoldData.init(allocator);
|
||||
defer cfd.deinit();
|
||||
|
||||
// The `CaseFold` structure takes a pointer to the data.
|
||||
const cf = CaseFold{ .fold_data = &cfd };
|
||||
|
||||
// `compatCaselessMatch` provides the deepest level of caseless
|
||||
// matching because it decomposes fully to NFKD.
|
||||
const a = "Héllo World! \u{3d3}";
|
||||
const b = "He\u{301}llo World! \u{3a5}\u{301}";
|
||||
try expect(try cf.compatCaselessMatch(allocator, &n, a, b));
|
||||
|
||||
const c = "He\u{301}llo World! \u{3d2}\u{301}";
|
||||
try expect(try cf.compatCaselessMatch(allocator, &n, a, c));
|
||||
|
||||
// `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch`
|
||||
// because it only decomposes to NFD. Naturally, it's faster because of this.
|
||||
try expect(!try cf.canonCaselessMatch(allocator, &n, a, b));
|
||||
try expect(try cf.canonCaselessMatch(allocator, &n, a, c));
|
||||
}
|
||||
```
|
||||
|
||||
## Display Width of Characters and Strings
|
||||
When displaying text with a fixed-width font on a terminal screen, it's very
|
||||
important to know exactly how many columns or cells each character should take.
|
||||
Most characters will use one column, but there are many, like emoji and East-
|
||||
Asian ideographs that need more space. The `DisplayWidth` module provides
|
||||
methods for this purpose. It also has methods that use the display width calculation
|
||||
to `center`, `padLeft`, `padRight`, and `wrap` text.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("DisplayWidth", zg.module("DisplayWidth"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const DisplayWidth = @import("DisplayWidth");
|
||||
|
||||
test "Display width" {
|
||||
// We need Unicode data for display width calculation.
|
||||
const dwd = try DisplayWidth.DisplayWidthData.init(allocator);
|
||||
defer dwd.deinit();
|
||||
|
||||
// The `DisplayWidth` structure takes a pointer to the data.
|
||||
const dw = DisplayWidth{ .data = &dwd };
|
||||
|
||||
// String display width
|
||||
try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
|
||||
try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊"));
|
||||
try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊"));
|
||||
try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
|
||||
try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
|
||||
|
||||
// Centering text
|
||||
const centered = try dw.center(allocator, "w😊w", 10, "-");
|
||||
defer allocator.free(centered);
|
||||
try expectEqualStrings("---w😊w---", centered);
|
||||
|
||||
// Pad left
|
||||
const right_aligned = try dw.padLeft(allocator, "abc", 9, "*");
|
||||
defer allocator.free(right_aligned);
|
||||
try expectEqualStrings("******abc", right_aligned);
|
||||
|
||||
// Pad right
|
||||
const left_aligned = try dw.padRight(allocator, "abc", 9, "*");
|
||||
defer allocator.free(left_aligned);
|
||||
try expectEqualStrings("abc******", left_aligned);
|
||||
|
||||
// Wrap text
|
||||
const input = "The quick brown fox\r\njumped over the lazy dog!";
|
||||
const wrapped = try dw.wrap(allocator, input, 10, 3);
|
||||
defer allocator.free(wrapped);
|
||||
const want =
|
||||
\\The quick
|
||||
\\brown fox
|
||||
\\jumped
|
||||
\\over the
|
||||
\\lazy dog!
|
||||
;
|
||||
try expectEqualStrings(want, wrapped);
|
||||
}
|
||||
```
|
||||
|
||||
## Scripts
|
||||
Unicode categorizes code points by the Script in which they belong. A Script
|
||||
collects letters and other symbols that belong to a particular writing system.
|
||||
You can detect the Script for a code point with the `ScriptsData` module.
|
||||
|
||||
In your `build.zig`:
|
||||
|
||||
```zig
|
||||
exe.root_module.addImport("ScriptsData", zg.module("ScriptsData"));
|
||||
```
|
||||
|
||||
In your code:
|
||||
|
||||
```zig
|
||||
const ScriptsData = @import("ScriptsData");
|
||||
|
||||
test "Scripts" {
|
||||
const sd = try ScriptsData.init(allocator);
|
||||
defer sd.deinit();
|
||||
|
||||
// To see the full list of Scripts, look at the
|
||||
// `src/ScriptsData.zig` file. They are list in an enum.
|
||||
try expect(sd.script('A') == .Latin);
|
||||
try expect(sd.script('Ω') == .Greek);
|
||||
try expect(sd.script('צ') == .Hebrew);
|
||||
}
|
||||
```
|
||||
|
||||
## Relation to Ziglyph
|
||||
zg is a total re-write of some of the components of Ziglyph. The idea was to
|
||||
reduce binary size and improve performance. These goals were achieved by using
|
||||
trie-like data structures (inspired by [Ghostty's implementation](https://mitchellh.com/writing/ghostty-devlog-006))
|
||||
instead of generated functions. Where Ziglyph uses a function call, zg uses an
|
||||
array lookup, which is quite faster. In addition, all these data structures in
|
||||
zg are loaded at runtime from compressed versions in the binary. This allows
|
||||
for smaller binary sizes at the expense of increased memory
|
||||
footprint at runtime.
|
||||
|
||||
Benchmarks demonstrate the above stated goals have been met:
|
||||
|
||||
```plain
|
||||
Binary sizes =======
|
||||
|
||||
149K ziglyph_case
|
||||
87K zg_case
|
||||
|
||||
275K ziglyph_caseless
|
||||
168K zg_caseless
|
||||
|
||||
68K ziglyph_codepoint
|
||||
68K zg_codepoint
|
||||
|
||||
101K ziglyph_grapheme
|
||||
86K zg_grapheme
|
||||
|
||||
185K ziglyph_normalizer
|
||||
152K zg_normalize
|
||||
|
||||
101K ziglyph_width
|
||||
86K zg_width
|
||||
|
||||
Benchmarks ==========
|
||||
|
||||
Ziglyph toUpperStr/toLowerStr: result: 7911596, took: 80
|
||||
Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17
|
||||
zg toUpperStr/toLowerStr: result: 7911596, took: 62
|
||||
zg isUpperStr/isLowerStr: result: 110959, took: 7
|
||||
|
||||
Ziglyph Normalizer.eqlCaseless: result: 625, took: 500
|
||||
zg CaseFold.canonCaselessMatch: result: 625, took: 385
|
||||
zg CaseFold.compatCaselessMatch: result: 625, took: 593
|
||||
|
||||
Ziglyph CodePointIterator: result: 3769314, took: 2
|
||||
zg CodePointIterator: result: 3769314, took: 3
|
||||
|
||||
Ziglyph GraphemeIterator: result: 3691806, took: 48
|
||||
zg GraphemeIterator: result: 3691806, took: 16
|
||||
|
||||
Ziglyph Normalizer.nfkc: result: 3934162, took: 416
|
||||
zg Normalize.nfkc: result: 3934162, took: 182
|
||||
|
||||
Ziglyph Normalizer.nfc: result: 3955798, took: 57
|
||||
zg Normalize.nfc: result: 3955798, took: 28
|
||||
|
||||
Ziglyph Normalizer.nfkd: result: 4006398, took: 172
|
||||
zg Normalize.nfkd: result: 4006398, took: 104
|
||||
|
||||
Ziglyph Normalizer.nfd: result: 4028034, took: 169
|
||||
zg Normalize.nfd: result: 4028034, took: 104
|
||||
|
||||
Ziglyph Normalizer.eql: result: 625, took: 337
|
||||
Zg Normalize.eql: result: 625, took: 53
|
||||
|
||||
Ziglyph display_width.strWidth: result: 3700914, took: 71
|
||||
zg DisplayWidth.strWidth: result: 3700914, took: 24
|
||||
```
|
||||
|
||||
These results were obtained on an M1 Mac with 16 GiB of RAM.
|
||||
|
||||
In contrast to Ziglyph, zg does not have:
|
||||
|
||||
- Word segmentation
|
||||
- Sentence segmentation
|
||||
- Collation
|
||||
|
||||
It's possible that any missing functionality will be added in future versions,
|
||||
but only if enough demand is present in the community.
|
||||
|
1
deps/zg/UNICODE_VERSION.txt
vendored
Normal file
1
deps/zg/UNICODE_VERSION.txt
vendored
Normal file
@@ -0,0 +1 @@
|
||||
This software is compatible with Unicode version 15.1.0
|
337
deps/zg/build.zig
vendored
Normal file
337
deps/zg/build.zig
vendored
Normal file
@@ -0,0 +1,337 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
// Code generation
|
||||
// Grapheme break
|
||||
const gbp_gen_exe = b.addExecutable(.{
|
||||
.name = "gbp",
|
||||
.root_source_file = b.path("codegen/gbp.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe);
|
||||
const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z");
|
||||
|
||||
// Display width
|
||||
const cjk = b.option(bool, "cjk", "Ambiguouse code points are wide (display width: 2).") orelse false;
|
||||
const options = b.addOptions();
|
||||
options.addOption(bool, "cjk", cjk);
|
||||
|
||||
const dwp_gen_exe = b.addExecutable(.{
|
||||
.name = "dwp",
|
||||
.root_source_file = b.path("codegen/dwp.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
dwp_gen_exe.root_module.addOptions("options", options);
|
||||
const run_dwp_gen_exe = b.addRunArtifact(dwp_gen_exe);
|
||||
const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z");
|
||||
|
||||
// Normalization properties
|
||||
const canon_gen_exe = b.addExecutable(.{
|
||||
.name = "canon",
|
||||
.root_source_file = b.path("codegen/canon.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe);
|
||||
const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z");
|
||||
|
||||
const compat_gen_exe = b.addExecutable(.{
|
||||
.name = "compat",
|
||||
.root_source_file = b.path("codegen/compat.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe);
|
||||
const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z");
|
||||
|
||||
const hangul_gen_exe = b.addExecutable(.{
|
||||
.name = "hangul",
|
||||
.root_source_file = b.path("codegen/hangul.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_hangul_gen_exe = b.addRunArtifact(hangul_gen_exe);
|
||||
const hangul_gen_out = run_hangul_gen_exe.addOutputFileArg("hangul.bin.z");
|
||||
|
||||
const normp_gen_exe = b.addExecutable(.{
|
||||
.name = "normp",
|
||||
.root_source_file = b.path("codegen/normp.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_normp_gen_exe = b.addRunArtifact(normp_gen_exe);
|
||||
const normp_gen_out = run_normp_gen_exe.addOutputFileArg("normp.bin.z");
|
||||
|
||||
const ccc_gen_exe = b.addExecutable(.{
|
||||
.name = "ccc",
|
||||
.root_source_file = b.path("codegen/ccc.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_ccc_gen_exe = b.addRunArtifact(ccc_gen_exe);
|
||||
const ccc_gen_out = run_ccc_gen_exe.addOutputFileArg("ccc.bin.z");
|
||||
|
||||
const gencat_gen_exe = b.addExecutable(.{
|
||||
.name = "gencat",
|
||||
.root_source_file = b.path("codegen/gencat.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe);
|
||||
const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z");
|
||||
|
||||
const fold_gen_exe = b.addExecutable(.{
|
||||
.name = "fold",
|
||||
.root_source_file = b.path("codegen/fold.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_fold_gen_exe = b.addRunArtifact(fold_gen_exe);
|
||||
const fold_gen_out = run_fold_gen_exe.addOutputFileArg("fold.bin.z");
|
||||
|
||||
// Numeric types
|
||||
const num_gen_exe = b.addExecutable(.{
|
||||
.name = "numeric",
|
||||
.root_source_file = b.path("codegen/numeric.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_num_gen_exe = b.addRunArtifact(num_gen_exe);
|
||||
const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z");
|
||||
|
||||
// Letter case properties
|
||||
const case_prop_gen_exe = b.addExecutable(.{
|
||||
.name = "case_prop",
|
||||
.root_source_file = b.path("codegen/case_prop.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe);
|
||||
const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z");
|
||||
|
||||
// Uppercase mappings
|
||||
const upper_gen_exe = b.addExecutable(.{
|
||||
.name = "upper",
|
||||
.root_source_file = b.path("codegen/upper.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe);
|
||||
const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z");
|
||||
|
||||
// Lowercase mappings
|
||||
const lower_gen_exe = b.addExecutable(.{
|
||||
.name = "lower",
|
||||
.root_source_file = b.path("codegen/lower.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe);
|
||||
const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z");
|
||||
|
||||
const scripts_gen_exe = b.addExecutable(.{
|
||||
.name = "scripts",
|
||||
.root_source_file = b.path("codegen/scripts.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe);
|
||||
const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z");
|
||||
|
||||
const core_gen_exe = b.addExecutable(.{
|
||||
.name = "core",
|
||||
.root_source_file = b.path("codegen/core_props.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_core_gen_exe = b.addRunArtifact(core_gen_exe);
|
||||
const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z");
|
||||
|
||||
const props_gen_exe = b.addExecutable(.{
|
||||
.name = "props",
|
||||
.root_source_file = b.path("codegen/props.zig"),
|
||||
.target = b.host,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
const run_props_gen_exe = b.addRunArtifact(props_gen_exe);
|
||||
const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z");
|
||||
|
||||
// Modules we provide
|
||||
// Code points
|
||||
const code_point = b.addModule("code_point", .{
|
||||
.root_source_file = b.path("src/code_point.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
// Grapheme clusters
|
||||
const grapheme_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/GraphemeData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
grapheme_data.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out });
|
||||
|
||||
const grapheme = b.addModule("grapheme", .{
|
||||
.root_source_file = b.path("src/grapheme.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
grapheme.addImport("code_point", code_point);
|
||||
grapheme.addImport("GraphemeData", grapheme_data);
|
||||
|
||||
// ASCII utilities
|
||||
const ascii = b.addModule("ascii", .{
|
||||
.root_source_file = b.path("src/ascii.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
// Fixed pitch font display width
|
||||
const width_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/WidthData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
width_data.addAnonymousImport("dwp", .{ .root_source_file = dwp_gen_out });
|
||||
width_data.addImport("GraphemeData", grapheme_data);
|
||||
|
||||
const display_width = b.addModule("DisplayWidth", .{
|
||||
.root_source_file = b.path("src/DisplayWidth.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
display_width.addImport("ascii", ascii);
|
||||
display_width.addImport("code_point", code_point);
|
||||
display_width.addImport("grapheme", grapheme);
|
||||
display_width.addImport("DisplayWidthData", width_data);
|
||||
|
||||
// Normalization
|
||||
const ccc_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/CombiningData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out });
|
||||
|
||||
const canon_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/CanonData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
canon_data.addAnonymousImport("canon", .{ .root_source_file = canon_gen_out });
|
||||
|
||||
const compat_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/CompatData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
compat_data.addAnonymousImport("compat", .{ .root_source_file = compat_gen_out });
|
||||
|
||||
const hangul_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/HangulData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
hangul_data.addAnonymousImport("hangul", .{ .root_source_file = hangul_gen_out });
|
||||
|
||||
const normp_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/NormPropsData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
normp_data.addAnonymousImport("normp", .{ .root_source_file = normp_gen_out });
|
||||
|
||||
const norm_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/NormData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
norm_data.addImport("CanonData", canon_data);
|
||||
norm_data.addImport("CombiningData", ccc_data);
|
||||
norm_data.addImport("CompatData", compat_data);
|
||||
norm_data.addImport("HangulData", hangul_data);
|
||||
norm_data.addImport("NormPropsData", normp_data);
|
||||
|
||||
const norm = b.addModule("Normalize", .{
|
||||
.root_source_file = b.path("src/Normalize.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
norm.addImport("ascii", ascii);
|
||||
norm.addImport("code_point", code_point);
|
||||
norm.addImport("NormData", norm_data);
|
||||
|
||||
// General Category
|
||||
const gencat_data = b.addModule("GenCatData", .{
|
||||
.root_source_file = b.path("src/GenCatData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
gencat_data.addAnonymousImport("gencat", .{ .root_source_file = gencat_gen_out });
|
||||
|
||||
// Case folding
|
||||
const fold_data = b.createModule(.{
|
||||
.root_source_file = b.path("src/FoldData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
fold_data.addAnonymousImport("fold", .{ .root_source_file = fold_gen_out });
|
||||
|
||||
const case_fold = b.addModule("CaseFold", .{
|
||||
.root_source_file = b.path("src/CaseFold.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
case_fold.addImport("ascii", ascii);
|
||||
case_fold.addImport("FoldData", fold_data);
|
||||
case_fold.addImport("Normalize", norm);
|
||||
|
||||
// Letter case
|
||||
const case_data = b.addModule("CaseData", .{
|
||||
.root_source_file = b.path("src/CaseData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
case_data.addImport("code_point", code_point);
|
||||
case_data.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out });
|
||||
case_data.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out });
|
||||
case_data.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out });
|
||||
|
||||
// Scripts
|
||||
const scripts_data = b.addModule("ScriptsData", .{
|
||||
.root_source_file = b.path("src/ScriptsData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out });
|
||||
|
||||
// Properties
|
||||
const props_data = b.addModule("PropsData", .{
|
||||
.root_source_file = b.path("src/PropsData.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
props_data.addAnonymousImport("core_props", .{ .root_source_file = core_gen_out });
|
||||
props_data.addAnonymousImport("props", .{ .root_source_file = props_gen_out });
|
||||
props_data.addAnonymousImport("numeric", .{ .root_source_file = num_gen_out });
|
||||
|
||||
// Unicode Tests
|
||||
const unicode_tests = b.addTest(.{
|
||||
.root_source_file = b.path("src/unicode_tests.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
unicode_tests.root_module.addImport("grapheme", grapheme);
|
||||
unicode_tests.root_module.addImport("Normalize", norm);
|
||||
|
||||
const run_unicode_tests = b.addRunArtifact(unicode_tests);
|
||||
|
||||
const unicode_test_step = b.step("unicode-test", "Run Unicode tests");
|
||||
unicode_test_step.dependOn(&run_unicode_tests.step);
|
||||
}
|
17
deps/zg/build.zig.zon
vendored
Normal file
17
deps/zg/build.zig.zon
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
.{
|
||||
.name = "zg",
|
||||
.version = "0.13.1",
|
||||
.minimum_zig_version = "0.13.0",
|
||||
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"codegen",
|
||||
"data",
|
||||
"LICENSE",
|
||||
"README.md",
|
||||
"src",
|
||||
"unicode_license",
|
||||
"UNICODE_VERSION.txt",
|
||||
},
|
||||
}
|
67
deps/zg/codegen/canon.zig
vendored
Normal file
67
deps/zg/codegen/canon.zig
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cps: [3]u24 = undefined;
|
||||
var len: u8 = 2;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
|
||||
|
||||
5 => {
|
||||
// Not canonical.
|
||||
if (field.len == 0 or field[0] == '<') continue :lines;
|
||||
if (std.mem.indexOfScalar(u8, field, ' ')) |space| {
|
||||
// Canonical
|
||||
len = 3;
|
||||
cps[1] = try std.fmt.parseInt(u24, field[0..space], 16);
|
||||
cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16);
|
||||
} else {
|
||||
// Singleton
|
||||
cps[1] = try std.fmt.parseInt(u24, field, 16);
|
||||
}
|
||||
},
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u8, @intCast(len), endian);
|
||||
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
|
||||
}
|
||||
|
||||
try writer.writeInt(u16, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/case_prop.zig
vendored
Normal file
135
deps/zg/codegen/case_prop.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Props
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Lowercase")) bit = 1;
|
||||
if (mem.eql(u8, field, "Uppercase")) bit = 2;
|
||||
if (mem.eql(u8, field, "Cased")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
124
deps/zg/codegen/ccc.zig
vendored
Normal file
124
deps/zg/codegen/ccc.zig
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCombiningClass.txt
|
||||
var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{});
|
||||
defer cc_file.close();
|
||||
var cc_buf = std.io.bufferedReader(cc_file.reader());
|
||||
const cc_reader = cc_buf.reader();
|
||||
|
||||
while (try cc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Combining Class
|
||||
if (std.mem.eql(u8, field, "0")) continue;
|
||||
const cc = try std.fmt.parseInt(u8, field, 10);
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), cc);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const cc = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = cc;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
64
deps/zg/codegen/compat.zig
vendored
Normal file
64
deps/zg/codegen/compat.zig
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cps: [19]u24 = undefined;
|
||||
var len: u8 = 1;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
|
||||
|
||||
5 => {
|
||||
// Not compatibility.
|
||||
if (field.len == 0 or field[0] != '<') continue :lines;
|
||||
var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
|
||||
_ = cp_iter.next(); // <compat type>
|
||||
|
||||
while (cp_iter.next()) |cp_str| : (len += 1) {
|
||||
cps[len] = try std.fmt.parseInt(u24, cp_str, 16);
|
||||
}
|
||||
},
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u8, @intCast(len), endian);
|
||||
for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
|
||||
}
|
||||
|
||||
try writer.writeInt(u16, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
138
deps/zg/codegen/core_props.zig
vendored
Normal file
138
deps/zg/codegen/core_props.zig
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Math")) bit = 1;
|
||||
if (mem.eql(u8, field, "Alphabetic")) bit = 2;
|
||||
if (mem.eql(u8, field, "ID_Start")) bit = 4;
|
||||
if (mem.eql(u8, field, "ID_Continue")) bit = 8;
|
||||
if (mem.eql(u8, field, "XID_Start")) bit = 16;
|
||||
if (mem.eql(u8, field, "XID_Continue")) bit = 32;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
244
deps/zg/codegen/dwp.zig
vendored
Normal file
244
deps/zg/codegen/dwp.zig
vendored
Normal file
@@ -0,0 +1,244 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const options = @import("options");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]i3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(i3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, i3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedEastAsianWidth.txt
|
||||
var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{});
|
||||
defer deaw_file.close();
|
||||
var deaw_buf = std.io.bufferedReader(deaw_file.reader());
|
||||
const deaw_reader = deaw_buf.reader();
|
||||
|
||||
while (try deaw_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
// @missing ranges
|
||||
if (std.mem.startsWith(u8, line, "# @missing: ")) {
|
||||
const semi = std.mem.indexOfScalar(u8, line, ';').?;
|
||||
const field = line[12..semi];
|
||||
const dots = std.mem.indexOf(u8, field, "..").?;
|
||||
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
|
||||
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
|
||||
if (from == 0 and to == 0x10ffff) continue;
|
||||
for (from..to + 1) |cp| try flat_map.put(@intCast(cp), 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Width
|
||||
if (std.mem.eql(u8, field, "W") or
|
||||
std.mem.eql(u8, field, "F") or
|
||||
(options.cjk and std.mem.eql(u8, field, "A")))
|
||||
{
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 2);
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
|
||||
defer dgc_file.close();
|
||||
var dgc_buf = std.io.bufferedReader(dgc_file.reader());
|
||||
const dgc_reader = dgc_buf.reader();
|
||||
|
||||
while (try dgc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// General category
|
||||
if (std.mem.eql(u8, field, "Mn")) {
|
||||
// Nonspacing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Me")) {
|
||||
// Enclosing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Mc")) {
|
||||
// Spacing_Mark
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
} else if (std.mem.eql(u8, field, "Cf")) {
|
||||
if (std.mem.indexOf(u8, line, "ARABIC") == null) {
|
||||
// Format except Arabic
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), 0);
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(i3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]i3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
var width = flat_map.get(cp) orelse 1;
|
||||
|
||||
// Specific overrides
|
||||
switch (cp) {
|
||||
// Three-em dash
|
||||
0x2e3b => width = 3,
|
||||
|
||||
// C0/C1 control codes
|
||||
0...0x20,
|
||||
0x80...0xa0,
|
||||
|
||||
// Line separator
|
||||
0x2028,
|
||||
|
||||
// Paragraph separator
|
||||
0x2029,
|
||||
|
||||
// Hangul syllable and ignorable.
|
||||
0x1160...0x11ff,
|
||||
0xd7b0...0xd7ff,
|
||||
0x2060...0x206f,
|
||||
0xfff0...0xfff8,
|
||||
0xe0000...0xE0fff,
|
||||
=> width = 0,
|
||||
|
||||
// Two-em dash
|
||||
0x2e3a,
|
||||
|
||||
// Regional indicators
|
||||
0x1f1e6...0x1f200,
|
||||
|
||||
// CJK Blocks
|
||||
0x3400...0x4dbf, // CJK Unified Ideographs Extension A
|
||||
0x4e00...0x9fff, // CJK Unified Ideographs
|
||||
0xf900...0xfaff, // CJK Compatibility Ideographs
|
||||
0x20000...0x2fffd, // Plane 2
|
||||
0x30000...0x3fffd, // Plane 3
|
||||
=> width = 2,
|
||||
|
||||
else => {},
|
||||
}
|
||||
|
||||
// ASCII
|
||||
if (0x20 <= cp and cp < 0x7f) width = 1;
|
||||
|
||||
// Soft hyphen
|
||||
if (cp == 0xad) width = 1;
|
||||
|
||||
// Backspace and delete
|
||||
if (cp == 0x8 or cp == 0x7f) width = -1;
|
||||
|
||||
// Process block
|
||||
block[block_len] = width;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(i8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
252
deps/zg/codegen/fold.zig
vendored
Normal file
252
deps/zg/codegen/fold.zig
vendored
Normal file
@@ -0,0 +1,252 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer std.debug.assert(gpa.deinit() == .ok);
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer props_file.close();
|
||||
var props_buf = std.io.bufferedReader(props_file.reader());
|
||||
const props_reader = props_buf.reader();
|
||||
|
||||
var props_map = std.AutoHashMap(u21, void).init(allocator);
|
||||
defer props_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
|
||||
defer codepoint_mapping.deinit();
|
||||
|
||||
// Process CaseFolding.txt
|
||||
var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
|
||||
defer cp_file.close();
|
||||
var cp_buf = std.io.bufferedReader(cp_file.reader());
|
||||
const cp_reader = cp_buf.reader();
|
||||
|
||||
while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
var field_it = std.mem.splitScalar(u8, line, ';');
|
||||
const codepoint_str = field_it.first();
|
||||
const codepoint = try std.fmt.parseUnsigned(u21, codepoint_str, 16);
|
||||
|
||||
const status = std.mem.trim(u8, field_it.next() orelse continue, " ");
|
||||
// Only interested in 'common' and 'full'
|
||||
if (status[0] != 'C' and status[0] != 'F') continue;
|
||||
|
||||
const mapping = std.mem.trim(u8, field_it.next() orelse continue, " ");
|
||||
var mapping_it = std.mem.splitScalar(u8, mapping, ' ');
|
||||
var mapping_buf = [_]u21{0} ** 3;
|
||||
var mapping_i: u8 = 0;
|
||||
while (mapping_it.next()) |mapping_c| {
|
||||
mapping_buf[mapping_i] = try std.fmt.parseInt(u21, mapping_c, 16);
|
||||
mapping_i += 1;
|
||||
}
|
||||
|
||||
try codepoint_mapping.putNoClobber(codepoint, mapping_buf);
|
||||
}
|
||||
|
||||
var changes_when_casefolded_exceptions = std.ArrayList(u21).init(allocator);
|
||||
defer changes_when_casefolded_exceptions.deinit();
|
||||
|
||||
{
|
||||
// Codepoints with a case fold mapping can be missing the Changes_When_Casefolded property,
|
||||
// but not vice versa.
|
||||
for (codepoint_mapping.keys()) |codepoint| {
|
||||
if (props_map.get(codepoint) == null) {
|
||||
try changes_when_casefolded_exceptions.append(codepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var offset_to_index = std.AutoHashMap(i32, u8).init(allocator);
|
||||
defer offset_to_index.deinit();
|
||||
var unique_offsets = std.AutoArrayHashMap(i32, u32).init(allocator);
|
||||
defer unique_offsets.deinit();
|
||||
|
||||
// First pass
|
||||
{
|
||||
var it = codepoint_mapping.iterator();
|
||||
while (it.next()) |entry| {
|
||||
const codepoint = entry.key_ptr.*;
|
||||
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
|
||||
if (mappings.len == 1) {
|
||||
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
|
||||
const result = try unique_offsets.getOrPut(offset);
|
||||
if (!result.found_existing) result.value_ptr.* = 0;
|
||||
result.value_ptr.* += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// A codepoint mapping to itself (offset=0) is the most common case
|
||||
try unique_offsets.put(0, 0x10FFFF);
|
||||
const C = struct {
|
||||
vals: []u32,
|
||||
|
||||
pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
|
||||
return ctx.vals[a_index] > ctx.vals[b_index];
|
||||
}
|
||||
};
|
||||
unique_offsets.sort(C{ .vals = unique_offsets.values() });
|
||||
|
||||
var offset_it = unique_offsets.iterator();
|
||||
var offset_index: u7 = 0;
|
||||
while (offset_it.next()) |entry| {
|
||||
try offset_to_index.put(entry.key_ptr.*, offset_index);
|
||||
offset_index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
var mappings_to_index = std.AutoArrayHashMap([3]u21, u8).init(allocator);
|
||||
defer mappings_to_index.deinit();
|
||||
var codepoint_to_index = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer codepoint_to_index.deinit();
|
||||
|
||||
// Second pass
|
||||
{
|
||||
var count_multiple_codepoints: u8 = 0;
|
||||
|
||||
var it = codepoint_mapping.iterator();
|
||||
while (it.next()) |entry| {
|
||||
const codepoint = entry.key_ptr.*;
|
||||
const mappings = std.mem.sliceTo(entry.value_ptr, 0);
|
||||
if (mappings.len > 1) {
|
||||
const result = try mappings_to_index.getOrPut(entry.value_ptr.*);
|
||||
if (!result.found_existing) {
|
||||
result.value_ptr.* = 0x80 | count_multiple_codepoints;
|
||||
count_multiple_codepoints += 1;
|
||||
}
|
||||
const index = result.value_ptr.*;
|
||||
try codepoint_to_index.put(codepoint, index);
|
||||
} else {
|
||||
const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint);
|
||||
const index = offset_to_index.get(offset).?;
|
||||
try codepoint_to_index.put(codepoint, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build the stage1/stage2/stage3 arrays and output them
|
||||
{
|
||||
const Block = [256]u8;
|
||||
var stage2_blocks = std.AutoArrayHashMap(Block, void).init(allocator);
|
||||
defer stage2_blocks.deinit();
|
||||
|
||||
const empty_block: Block = [_]u8{0} ** 256;
|
||||
try stage2_blocks.put(empty_block, {});
|
||||
const stage1_len = (0x10FFFF / 256) + 1;
|
||||
var stage1: [stage1_len]u8 = undefined;
|
||||
|
||||
var codepoint: u21 = 0;
|
||||
var block: Block = undefined;
|
||||
while (codepoint <= 0x10FFFF) {
|
||||
const data_index = codepoint_to_index.get(codepoint) orelse 0;
|
||||
block[codepoint % 256] = data_index;
|
||||
|
||||
codepoint += 1;
|
||||
if (codepoint % 256 == 0) {
|
||||
const result = try stage2_blocks.getOrPut(block);
|
||||
const index = result.index;
|
||||
stage1[(codepoint >> 8) - 1] = @intCast(index);
|
||||
}
|
||||
}
|
||||
|
||||
const last_meaningful_block = std.mem.lastIndexOfNone(u8, &stage1, "\x00").?;
|
||||
const meaningful_stage1 = stage1[0 .. last_meaningful_block + 1];
|
||||
const codepoint_cutoff = (last_meaningful_block + 1) << 8;
|
||||
const multiple_codepoint_start: usize = unique_offsets.count();
|
||||
|
||||
var index: usize = 0;
|
||||
const stage3_elems = unique_offsets.count() + mappings_to_index.count() * 3;
|
||||
var stage3 = try allocator.alloc(i24, stage3_elems);
|
||||
defer allocator.free(stage3);
|
||||
for (unique_offsets.keys()) |key| {
|
||||
stage3[index] = @intCast(key);
|
||||
index += 1;
|
||||
}
|
||||
for (mappings_to_index.keys()) |key| {
|
||||
stage3[index] = @intCast(key[0]);
|
||||
stage3[index + 1] = @intCast(key[1]);
|
||||
stage3[index + 2] = @intCast(key[2]);
|
||||
index += 3;
|
||||
}
|
||||
|
||||
const stage2_elems = stage2_blocks.count() * 256;
|
||||
var stage2 = try allocator.alloc(u8, stage2_elems);
|
||||
defer allocator.free(stage2);
|
||||
for (stage2_blocks.keys(), 0..) |key, i| {
|
||||
@memcpy(stage2[i * 256 ..][0..256], &key);
|
||||
}
|
||||
|
||||
// Write out compressed binary data file.
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
// Table metadata.
|
||||
try writer.writeInt(u24, @intCast(codepoint_cutoff), endian);
|
||||
try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian);
|
||||
// Stage 1
|
||||
try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian);
|
||||
try writer.writeAll(meaningful_stage1);
|
||||
// Stage 2
|
||||
try writer.writeInt(u16, @intCast(stage2.len), endian);
|
||||
try writer.writeAll(stage2);
|
||||
// Stage 3
|
||||
try writer.writeInt(u16, @intCast(stage3.len), endian);
|
||||
for (stage3) |offset| try writer.writeInt(i24, offset, endian);
|
||||
// Changes when case folded
|
||||
// Min and max
|
||||
try writer.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian);
|
||||
try writer.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian);
|
||||
try writer.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian);
|
||||
for (changes_when_casefolded_exceptions.items) |cp| try writer.writeInt(u24, cp, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
||||
}
|
248
deps/zg/codegen/gbp.zig
vendored
Normal file
248
deps/zg/codegen/gbp.zig
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Indic = enum {
|
||||
none,
|
||||
|
||||
Consonant,
|
||||
Extend,
|
||||
Linker,
|
||||
};
|
||||
|
||||
const Gbp = enum {
|
||||
none,
|
||||
|
||||
Control,
|
||||
CR,
|
||||
Extend,
|
||||
L,
|
||||
LF,
|
||||
LV,
|
||||
LVT,
|
||||
Prepend,
|
||||
Regional_Indicator,
|
||||
SpacingMark,
|
||||
T,
|
||||
V,
|
||||
ZWJ,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u16;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u16, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var indic_map = std.AutoHashMap(u21, Indic).init(allocator);
|
||||
defer indic_map.deinit();
|
||||
|
||||
var gbp_map = std.AutoHashMap(u21, Gbp).init(allocator);
|
||||
defer gbp_map.deinit();
|
||||
|
||||
var emoji_set = std.AutoHashMap(u21, void).init(allocator);
|
||||
defer emoji_set.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process Indic
|
||||
var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
|
||||
defer indic_file.close();
|
||||
var indic_buf = std.io.bufferedReader(indic_file.reader());
|
||||
const indic_reader = indic_buf.reader();
|
||||
|
||||
while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
if (std.mem.indexOf(u8, line, "InCB") == null) continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
2 => {
|
||||
// Prop
|
||||
const prop = std.meta.stringToEnum(Indic, field) orelse return error.InvalidPorp;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try indic_map.put(@intCast(cp), prop);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process GBP
|
||||
var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
|
||||
defer gbp_file.close();
|
||||
var gbp_buf = std.io.bufferedReader(gbp_file.reader());
|
||||
const gbp_reader = gbp_buf.reader();
|
||||
|
||||
while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Prop
|
||||
const prop = std.meta.stringToEnum(Gbp, field) orelse return error.InvalidPorp;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try gbp_map.put(@intCast(cp), prop);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process Emoji
|
||||
var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
|
||||
defer emoji_file.close();
|
||||
var emoji_buf = std.io.bufferedReader(emoji_file.reader());
|
||||
const emoji_reader = emoji_buf.reader();
|
||||
|
||||
while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
const from = try std.fmt.parseInt(u21, field[0..dots], 16);
|
||||
const to = try std.fmt.parseInt(u21, field[dots + 2 ..], 16);
|
||||
for (from..to + 1) |cp| try emoji_set.put(@intCast(cp), {});
|
||||
} else {
|
||||
const cp = try std.fmt.parseInt(u21, field, 16);
|
||||
try emoji_set.put(@intCast(cp), {});
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u16).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
|
||||
defer stage3.deinit();
|
||||
var stage3_len: u16 = 0;
|
||||
|
||||
var block: Block = [_]u16{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const gbp_prop: u8 = @intFromEnum(gbp_map.get(cp) orelse .none);
|
||||
const indic_prop: u8 = @intFromEnum(indic_map.get(cp) orelse .none);
|
||||
const emoji_prop: u1 = @intFromBool(emoji_set.contains(cp));
|
||||
var props_byte: u8 = gbp_prop << 4;
|
||||
props_byte |= indic_prop << 1;
|
||||
props_byte |= emoji_prop;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
const gop = try stage3.getOrPut(props_byte);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = stage3_len;
|
||||
stage3_len += 1;
|
||||
}
|
||||
|
||||
break :blk gop.value_ptr.*;
|
||||
};
|
||||
|
||||
block[block_len] = stage3_idx;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
const props_bytes = stage3.keys();
|
||||
try writer.writeInt(u16, @intCast(props_bytes.len), endian);
|
||||
try writer.writeAll(props_bytes);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
171
deps/zg/codegen/gencat.zig
vendored
Normal file
171
deps/zg/codegen/gencat.zig
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Gc = enum {
|
||||
Cc, // Other, Control
|
||||
Cf, // Other, Format
|
||||
Cn, // Other, Unassigned
|
||||
Co, // Other, Private Use
|
||||
Cs, // Other, Surrogate
|
||||
Ll, // Letter, Lowercase
|
||||
Lm, // Letter, Modifier
|
||||
Lo, // Letter, Other
|
||||
Lu, // Letter, Uppercase
|
||||
Lt, // Letter, Titlecase
|
||||
Mc, // Mark, Spacing Combining
|
||||
Me, // Mark, Enclosing
|
||||
Mn, // Mark, Non-Spacing
|
||||
Nd, // Number, Decimal Digit
|
||||
Nl, // Number, Letter
|
||||
No, // Number, Other
|
||||
Pc, // Punctuation, Connector
|
||||
Pd, // Punctuation, Dash
|
||||
Pe, // Punctuation, Close
|
||||
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
|
||||
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
|
||||
Po, // Punctuation, Other
|
||||
Ps, // Punctuation, Open
|
||||
Sc, // Symbol, Currency
|
||||
Sk, // Symbol, Modifier
|
||||
Sm, // Symbol, Math
|
||||
So, // Symbol, Other
|
||||
Zl, // Separator, Line
|
||||
Zp, // Separator, Paragraph
|
||||
Zs, // Separator, Space
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u5;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u5, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u5).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// General category
|
||||
const gc = std.meta.stringToEnum(Gc, field) orelse return error.UnknownGenCat;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(gc));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u5).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.ArrayList(u5).init(allocator);
|
||||
defer stage3.deinit();
|
||||
|
||||
var block: Block = [_]u5{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const gc = flat_map.get(cp).?;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
for (stage3.items, 0..) |gci, j| {
|
||||
if (gc == gci) break :blk j;
|
||||
}
|
||||
try stage3.append(gc);
|
||||
break :blk stage3.items.len - 1;
|
||||
};
|
||||
|
||||
// Process block
|
||||
block[block_len] = @intCast(stage3_idx);
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
|
||||
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
133
deps/zg/codegen/hangul.zig
vendored
Normal file
133
deps/zg/codegen/hangul.zig
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Syllable = enum {
|
||||
none,
|
||||
L,
|
||||
LV,
|
||||
LVT,
|
||||
V,
|
||||
T,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process HangulSyllableType.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Syllable type
|
||||
const st: Syllable = std.meta.stringToEnum(Syllable, field) orelse .none;
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(st));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const st = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = st;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
57
deps/zg/codegen/lower.zig
vendored
Normal file
57
deps/zg/codegen/lower.zig
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cp: i24 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cp = try std.fmt.parseInt(i24, field, 16),
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
13 => {
|
||||
// Simple lowercase mapping
|
||||
if (field.len == 0) continue :lines;
|
||||
try writer.writeInt(i24, cp, endian);
|
||||
const mapping = try std.fmt.parseInt(i24, field, 16);
|
||||
try writer.writeInt(i24, mapping - cp, endian);
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u24, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
134
deps/zg/codegen/normp.zig
vendored
Normal file
134
deps/zg/codegen/normp.zig
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u3;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u3, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u3).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedNormalizationProps.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Norm props
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
|
||||
if (std.mem.eql(u8, field, "NFD_QC")) {
|
||||
gop.value_ptr.* |= 1;
|
||||
} else if (std.mem.eql(u8, field, "NFKD_QC")) {
|
||||
gop.value_ptr.* |= 2;
|
||||
} else if (std.mem.eql(u8, field, "Full_Composition_Exclusion")) {
|
||||
gop.value_ptr.* |= 4;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u3).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u3{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const props = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = props;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/numeric.zig
vendored
Normal file
135
deps/zg/codegen/numeric.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedNumericType.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Numeric type
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "Numeric")) bit = 1;
|
||||
if (mem.eql(u8, field, "Digit")) bit = 2;
|
||||
if (mem.eql(u8, field, "Decimal")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const nt = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = nt;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
135
deps/zg/codegen/props.zig
vendored
Normal file
135
deps/zg/codegen/props.zig
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const mem = std.mem;
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process PropList.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Core property
|
||||
var bit: u8 = 0;
|
||||
|
||||
if (mem.eql(u8, field, "White_Space")) bit = 1;
|
||||
if (mem.eql(u8, field, "Hex_Digit")) bit = 2;
|
||||
if (mem.eql(u8, field, "Diacritic")) bit = 4;
|
||||
|
||||
if (bit != 0) {
|
||||
for (current_code[0]..current_code[1] + 1) |cp| {
|
||||
const gop = try flat_map.getOrPut(@intCast(cp));
|
||||
if (!gop.found_existing) gop.value_ptr.* = 0;
|
||||
gop.value_ptr.* |= bit;
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const prop = flat_map.get(cp) orelse 0;
|
||||
|
||||
// Process block
|
||||
block[block_len] = prop;
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
try writer.writeAll(stage2.items);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
308
deps/zg/codegen/scripts.zig
vendored
Normal file
308
deps/zg/codegen/scripts.zig
vendored
Normal file
@@ -0,0 +1,308 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const Script = enum {
|
||||
none,
|
||||
Adlam,
|
||||
Ahom,
|
||||
Anatolian_Hieroglyphs,
|
||||
Arabic,
|
||||
Armenian,
|
||||
Avestan,
|
||||
Balinese,
|
||||
Bamum,
|
||||
Bassa_Vah,
|
||||
Batak,
|
||||
Bengali,
|
||||
Bhaiksuki,
|
||||
Bopomofo,
|
||||
Brahmi,
|
||||
Braille,
|
||||
Buginese,
|
||||
Buhid,
|
||||
Canadian_Aboriginal,
|
||||
Carian,
|
||||
Caucasian_Albanian,
|
||||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
Cypriot,
|
||||
Cypro_Minoan,
|
||||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
Elbasan,
|
||||
Elymaic,
|
||||
Ethiopic,
|
||||
Georgian,
|
||||
Glagolitic,
|
||||
Gothic,
|
||||
Grantha,
|
||||
Greek,
|
||||
Gujarati,
|
||||
Gunjala_Gondi,
|
||||
Gurmukhi,
|
||||
Han,
|
||||
Hangul,
|
||||
Hanifi_Rohingya,
|
||||
Hanunoo,
|
||||
Hatran,
|
||||
Hebrew,
|
||||
Hiragana,
|
||||
Imperial_Aramaic,
|
||||
Inherited,
|
||||
Inscriptional_Pahlavi,
|
||||
Inscriptional_Parthian,
|
||||
Javanese,
|
||||
Kaithi,
|
||||
Kannada,
|
||||
Katakana,
|
||||
Kawi,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
Lao,
|
||||
Latin,
|
||||
Lepcha,
|
||||
Limbu,
|
||||
Linear_A,
|
||||
Linear_B,
|
||||
Lisu,
|
||||
Lycian,
|
||||
Lydian,
|
||||
Mahajani,
|
||||
Makasar,
|
||||
Malayalam,
|
||||
Mandaic,
|
||||
Manichaean,
|
||||
Marchen,
|
||||
Masaram_Gondi,
|
||||
Medefaidrin,
|
||||
Meetei_Mayek,
|
||||
Mende_Kikakui,
|
||||
Meroitic_Cursive,
|
||||
Meroitic_Hieroglyphs,
|
||||
Miao,
|
||||
Modi,
|
||||
Mongolian,
|
||||
Mro,
|
||||
Multani,
|
||||
Myanmar,
|
||||
Nabataean,
|
||||
Nag_Mundari,
|
||||
Nandinagari,
|
||||
New_Tai_Lue,
|
||||
Newa,
|
||||
Nko,
|
||||
Nushu,
|
||||
Nyiakeng_Puachue_Hmong,
|
||||
Ogham,
|
||||
Ol_Chiki,
|
||||
Old_Hungarian,
|
||||
Old_Italic,
|
||||
Old_North_Arabian,
|
||||
Old_Permic,
|
||||
Old_Persian,
|
||||
Old_Sogdian,
|
||||
Old_South_Arabian,
|
||||
Old_Turkic,
|
||||
Old_Uyghur,
|
||||
Oriya,
|
||||
Osage,
|
||||
Osmanya,
|
||||
Pahawh_Hmong,
|
||||
Palmyrene,
|
||||
Pau_Cin_Hau,
|
||||
Phags_Pa,
|
||||
Phoenician,
|
||||
Psalter_Pahlavi,
|
||||
Rejang,
|
||||
Runic,
|
||||
Samaritan,
|
||||
Saurashtra,
|
||||
Sharada,
|
||||
Shavian,
|
||||
Siddham,
|
||||
SignWriting,
|
||||
Sinhala,
|
||||
Sogdian,
|
||||
Sora_Sompeng,
|
||||
Soyombo,
|
||||
Sundanese,
|
||||
Syloti_Nagri,
|
||||
Syriac,
|
||||
Tagalog,
|
||||
Tagbanwa,
|
||||
Tai_Le,
|
||||
Tai_Tham,
|
||||
Tai_Viet,
|
||||
Takri,
|
||||
Tamil,
|
||||
Tangsa,
|
||||
Tangut,
|
||||
Telugu,
|
||||
Thaana,
|
||||
Thai,
|
||||
Tibetan,
|
||||
Tifinagh,
|
||||
Tirhuta,
|
||||
Toto,
|
||||
Ugaritic,
|
||||
Vai,
|
||||
Vithkuqi,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square,
|
||||
};
|
||||
|
||||
const block_size = 256;
|
||||
const Block = [block_size]u8;
|
||||
|
||||
const BlockMap = std.HashMap(
|
||||
Block,
|
||||
u16,
|
||||
struct {
|
||||
pub fn hash(_: @This(), k: Block) u64 {
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
|
||||
return hasher.final();
|
||||
}
|
||||
|
||||
pub fn eql(_: @This(), a: Block, b: Block) bool {
|
||||
return std.mem.eql(u8, &a, &b);
|
||||
}
|
||||
},
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
var flat_map = std.AutoHashMap(u21, u8).init(allocator);
|
||||
defer flat_map.deinit();
|
||||
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
// Process DerivedGeneralCategory.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0 or line[0] == '#') continue;
|
||||
|
||||
const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
|
||||
|
||||
var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
|
||||
var current_code: [2]u21 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => {
|
||||
// Code point(s)
|
||||
if (std.mem.indexOf(u8, field, "..")) |dots| {
|
||||
current_code = .{
|
||||
try std.fmt.parseInt(u21, field[0..dots], 16),
|
||||
try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
|
||||
};
|
||||
} else {
|
||||
const code = try std.fmt.parseInt(u21, field, 16);
|
||||
current_code = .{ code, code };
|
||||
}
|
||||
},
|
||||
1 => {
|
||||
// Script
|
||||
const script = std.meta.stringToEnum(Script, field) orelse {
|
||||
std.debug.print("Unknown script: {s}\n", .{field});
|
||||
return error.UnknownScript;
|
||||
};
|
||||
for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(script));
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var blocks_map = BlockMap.init(allocator);
|
||||
defer blocks_map.deinit();
|
||||
|
||||
var stage1 = std.ArrayList(u16).init(allocator);
|
||||
defer stage1.deinit();
|
||||
|
||||
var stage2 = std.ArrayList(u8).init(allocator);
|
||||
defer stage2.deinit();
|
||||
|
||||
var stage3 = std.ArrayList(u8).init(allocator);
|
||||
defer stage3.deinit();
|
||||
|
||||
var block: Block = [_]u8{0} ** block_size;
|
||||
var block_len: u16 = 0;
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
const script = flat_map.get(cp) orelse 0;
|
||||
|
||||
const stage3_idx = blk: {
|
||||
for (stage3.items, 0..) |script_i, j| {
|
||||
if (script == script_i) break :blk j;
|
||||
}
|
||||
try stage3.append(script);
|
||||
break :blk stage3.items.len - 1;
|
||||
};
|
||||
|
||||
// Process block
|
||||
block[block_len] = @intCast(stage3_idx);
|
||||
block_len += 1;
|
||||
|
||||
if (block_len < block_size and cp != 0x10ffff) continue;
|
||||
|
||||
const gop = try blocks_map.getOrPut(block);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = @intCast(stage2.items.len);
|
||||
try stage2.appendSlice(&block);
|
||||
}
|
||||
|
||||
try stage1.append(gop.value_ptr.*);
|
||||
block_len = 0;
|
||||
}
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
try writer.writeInt(u16, @intCast(stage1.items.len), endian);
|
||||
for (stage1.items) |i| try writer.writeInt(u16, i, endian);
|
||||
|
||||
try writer.writeInt(u16, @intCast(stage2.items.len), endian);
|
||||
for (stage2.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try writer.writeInt(u8, @intCast(stage3.items.len), endian);
|
||||
for (stage3.items) |i| try writer.writeInt(u8, i, endian);
|
||||
|
||||
try out_comp.flush();
|
||||
}
|
57
deps/zg/codegen/upper.zig
vendored
Normal file
57
deps/zg/codegen/upper.zig
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn main() !void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
// Process UnicodeData.txt
|
||||
var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
|
||||
defer in_file.close();
|
||||
var in_buf = std.io.bufferedReader(in_file.reader());
|
||||
const in_reader = in_buf.reader();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(allocator);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip();
|
||||
const output_path = args_iter.next() orelse @panic("No output file arg!");
|
||||
|
||||
const compressor = std.compress.flate.deflate.compressor;
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
|
||||
const writer = out_comp.writer();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var line_buf: [4096]u8 = undefined;
|
||||
|
||||
lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
|
||||
if (line.len == 0) continue;
|
||||
|
||||
var field_iter = std.mem.splitScalar(u8, line, ';');
|
||||
var cp: i24 = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (field_iter.next()) |field| : (i += 1) {
|
||||
switch (i) {
|
||||
0 => cp = try std.fmt.parseInt(i24, field, 16),
|
||||
|
||||
2 => if (line[0] == '<') continue :lines,
|
||||
|
||||
12 => {
|
||||
// Simple uppercase mapping
|
||||
if (field.len == 0) continue :lines;
|
||||
try writer.writeInt(i24, cp, endian);
|
||||
const mapping = try std.fmt.parseInt(i24, field, 16);
|
||||
try writer.writeInt(i24, mapping - cp, endian);
|
||||
},
|
||||
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try writer.writeInt(u24, 0, endian);
|
||||
try out_comp.flush();
|
||||
}
|
77508
deps/zg/data/lang_mix.txt
vendored
Normal file
77508
deps/zg/data/lang_mix.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1627
deps/zg/data/unicode/CaseFolding.txt
vendored
Normal file
1627
deps/zg/data/unicode/CaseFolding.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
12832
deps/zg/data/unicode/DerivedCoreProperties.txt
vendored
Normal file
12832
deps/zg/data/unicode/DerivedCoreProperties.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
858
deps/zg/data/unicode/HangulSyllableType.txt
vendored
Normal file
858
deps/zg/data/unicode/HangulSyllableType.txt
vendored
Normal file
@@ -0,0 +1,858 @@
|
||||
# HangulSyllableType-15.1.0.txt
|
||||
# Date: 2023-01-05, 20:34:42 GMT
|
||||
# © 2023 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see https://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Hangul_Syllable_Type
|
||||
|
||||
# All code points not explicitly listed for Hangul_Syllable_Type
|
||||
# have the value Not_Applicable (NA).
|
||||
|
||||
# @missing: 0000..10FFFF; Not_Applicable
|
||||
|
||||
# ================================================
|
||||
|
||||
# Hangul_Syllable_Type=Leading_Jamo
|
||||
|
||||
1100..115F ; L # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER
|
||||
A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
|
||||
|
||||
# Total code points: 125
|
||||
|
||||
# ================================================
|
||||
|
||||
# Hangul_Syllable_Type=Vowel_Jamo
|
||||
|
||||
1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE
|
||||
D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
|
||||
|
||||
# Total code points: 95
|
||||
|
||||
# ================================================
|
||||
|
||||
# Hangul_Syllable_Type=Trailing_Jamo
|
||||
|
||||
11A8..11FF ; T # Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
|
||||
D7CB..D7FB ; T # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
|
||||
|
||||
# Total code points: 137
|
||||
|
||||
# ================================================
|
||||
|
||||
# Hangul_Syllable_Type=LV_Syllable
|
||||
|
||||
AC00 ; LV # Lo HANGUL SYLLABLE GA
|
||||
AC1C ; LV # Lo HANGUL SYLLABLE GAE
|
||||
AC38 ; LV # Lo HANGUL SYLLABLE GYA
|
||||
AC54 ; LV # Lo HANGUL SYLLABLE GYAE
|
||||
AC70 ; LV # Lo HANGUL SYLLABLE GEO
|
||||
AC8C ; LV # Lo HANGUL SYLLABLE GE
|
||||
ACA8 ; LV # Lo HANGUL SYLLABLE GYEO
|
||||
ACC4 ; LV # Lo HANGUL SYLLABLE GYE
|
||||
ACE0 ; LV # Lo HANGUL SYLLABLE GO
|
||||
ACFC ; LV # Lo HANGUL SYLLABLE GWA
|
||||
AD18 ; LV # Lo HANGUL SYLLABLE GWAE
|
||||
AD34 ; LV # Lo HANGUL SYLLABLE GOE
|
||||
AD50 ; LV # Lo HANGUL SYLLABLE GYO
|
||||
AD6C ; LV # Lo HANGUL SYLLABLE GU
|
||||
AD88 ; LV # Lo HANGUL SYLLABLE GWEO
|
||||
ADA4 ; LV # Lo HANGUL SYLLABLE GWE
|
||||
ADC0 ; LV # Lo HANGUL SYLLABLE GWI
|
||||
ADDC ; LV # Lo HANGUL SYLLABLE GYU
|
||||
ADF8 ; LV # Lo HANGUL SYLLABLE GEU
|
||||
AE14 ; LV # Lo HANGUL SYLLABLE GYI
|
||||
AE30 ; LV # Lo HANGUL SYLLABLE GI
|
||||
AE4C ; LV # Lo HANGUL SYLLABLE GGA
|
||||
AE68 ; LV # Lo HANGUL SYLLABLE GGAE
|
||||
AE84 ; LV # Lo HANGUL SYLLABLE GGYA
|
||||
AEA0 ; LV # Lo HANGUL SYLLABLE GGYAE
|
||||
AEBC ; LV # Lo HANGUL SYLLABLE GGEO
|
||||
AED8 ; LV # Lo HANGUL SYLLABLE GGE
|
||||
AEF4 ; LV # Lo HANGUL SYLLABLE GGYEO
|
||||
AF10 ; LV # Lo HANGUL SYLLABLE GGYE
|
||||
AF2C ; LV # Lo HANGUL SYLLABLE GGO
|
||||
AF48 ; LV # Lo HANGUL SYLLABLE GGWA
|
||||
AF64 ; LV # Lo HANGUL SYLLABLE GGWAE
|
||||
AF80 ; LV # Lo HANGUL SYLLABLE GGOE
|
||||
AF9C ; LV # Lo HANGUL SYLLABLE GGYO
|
||||
AFB8 ; LV # Lo HANGUL SYLLABLE GGU
|
||||
AFD4 ; LV # Lo HANGUL SYLLABLE GGWEO
|
||||
AFF0 ; LV # Lo HANGUL SYLLABLE GGWE
|
||||
B00C ; LV # Lo HANGUL SYLLABLE GGWI
|
||||
B028 ; LV # Lo HANGUL SYLLABLE GGYU
|
||||
B044 ; LV # Lo HANGUL SYLLABLE GGEU
|
||||
B060 ; LV # Lo HANGUL SYLLABLE GGYI
|
||||
B07C ; LV # Lo HANGUL SYLLABLE GGI
|
||||
B098 ; LV # Lo HANGUL SYLLABLE NA
|
||||
B0B4 ; LV # Lo HANGUL SYLLABLE NAE
|
||||
B0D0 ; LV # Lo HANGUL SYLLABLE NYA
|
||||
B0EC ; LV # Lo HANGUL SYLLABLE NYAE
|
||||
B108 ; LV # Lo HANGUL SYLLABLE NEO
|
||||
B124 ; LV # Lo HANGUL SYLLABLE NE
|
||||
B140 ; LV # Lo HANGUL SYLLABLE NYEO
|
||||
B15C ; LV # Lo HANGUL SYLLABLE NYE
|
||||
B178 ; LV # Lo HANGUL SYLLABLE NO
|
||||
B194 ; LV # Lo HANGUL SYLLABLE NWA
|
||||
B1B0 ; LV # Lo HANGUL SYLLABLE NWAE
|
||||
B1CC ; LV # Lo HANGUL SYLLABLE NOE
|
||||
B1E8 ; LV # Lo HANGUL SYLLABLE NYO
|
||||
B204 ; LV # Lo HANGUL SYLLABLE NU
|
||||
B220 ; LV # Lo HANGUL SYLLABLE NWEO
|
||||
B23C ; LV # Lo HANGUL SYLLABLE NWE
|
||||
B258 ; LV # Lo HANGUL SYLLABLE NWI
|
||||
B274 ; LV # Lo HANGUL SYLLABLE NYU
|
||||
B290 ; LV # Lo HANGUL SYLLABLE NEU
|
||||
B2AC ; LV # Lo HANGUL SYLLABLE NYI
|
||||
B2C8 ; LV # Lo HANGUL SYLLABLE NI
|
||||
B2E4 ; LV # Lo HANGUL SYLLABLE DA
|
||||
B300 ; LV # Lo HANGUL SYLLABLE DAE
|
||||
B31C ; LV # Lo HANGUL SYLLABLE DYA
|
||||
B338 ; LV # Lo HANGUL SYLLABLE DYAE
|
||||
B354 ; LV # Lo HANGUL SYLLABLE DEO
|
||||
B370 ; LV # Lo HANGUL SYLLABLE DE
|
||||
B38C ; LV # Lo HANGUL SYLLABLE DYEO
|
||||
B3A8 ; LV # Lo HANGUL SYLLABLE DYE
|
||||
B3C4 ; LV # Lo HANGUL SYLLABLE DO
|
||||
B3E0 ; LV # Lo HANGUL SYLLABLE DWA
|
||||
B3FC ; LV # Lo HANGUL SYLLABLE DWAE
|
||||
B418 ; LV # Lo HANGUL SYLLABLE DOE
|
||||
B434 ; LV # Lo HANGUL SYLLABLE DYO
|
||||
B450 ; LV # Lo HANGUL SYLLABLE DU
|
||||
B46C ; LV # Lo HANGUL SYLLABLE DWEO
|
||||
B488 ; LV # Lo HANGUL SYLLABLE DWE
|
||||
B4A4 ; LV # Lo HANGUL SYLLABLE DWI
|
||||
B4C0 ; LV # Lo HANGUL SYLLABLE DYU
|
||||
B4DC ; LV # Lo HANGUL SYLLABLE DEU
|
||||
B4F8 ; LV # Lo HANGUL SYLLABLE DYI
|
||||
B514 ; LV # Lo HANGUL SYLLABLE DI
|
||||
B530 ; LV # Lo HANGUL SYLLABLE DDA
|
||||
B54C ; LV # Lo HANGUL SYLLABLE DDAE
|
||||
B568 ; LV # Lo HANGUL SYLLABLE DDYA
|
||||
B584 ; LV # Lo HANGUL SYLLABLE DDYAE
|
||||
B5A0 ; LV # Lo HANGUL SYLLABLE DDEO
|
||||
B5BC ; LV # Lo HANGUL SYLLABLE DDE
|
||||
B5D8 ; LV # Lo HANGUL SYLLABLE DDYEO
|
||||
B5F4 ; LV # Lo HANGUL SYLLABLE DDYE
|
||||
B610 ; LV # Lo HANGUL SYLLABLE DDO
|
||||
B62C ; LV # Lo HANGUL SYLLABLE DDWA
|
||||
B648 ; LV # Lo HANGUL SYLLABLE DDWAE
|
||||
B664 ; LV # Lo HANGUL SYLLABLE DDOE
|
||||
B680 ; LV # Lo HANGUL SYLLABLE DDYO
|
||||
B69C ; LV # Lo HANGUL SYLLABLE DDU
|
||||
B6B8 ; LV # Lo HANGUL SYLLABLE DDWEO
|
||||
B6D4 ; LV # Lo HANGUL SYLLABLE DDWE
|
||||
B6F0 ; LV # Lo HANGUL SYLLABLE DDWI
|
||||
B70C ; LV # Lo HANGUL SYLLABLE DDYU
|
||||
B728 ; LV # Lo HANGUL SYLLABLE DDEU
|
||||
B744 ; LV # Lo HANGUL SYLLABLE DDYI
|
||||
B760 ; LV # Lo HANGUL SYLLABLE DDI
|
||||
B77C ; LV # Lo HANGUL SYLLABLE RA
|
||||
B798 ; LV # Lo HANGUL SYLLABLE RAE
|
||||
B7B4 ; LV # Lo HANGUL SYLLABLE RYA
|
||||
B7D0 ; LV # Lo HANGUL SYLLABLE RYAE
|
||||
B7EC ; LV # Lo HANGUL SYLLABLE REO
|
||||
B808 ; LV # Lo HANGUL SYLLABLE RE
|
||||
B824 ; LV # Lo HANGUL SYLLABLE RYEO
|
||||
B840 ; LV # Lo HANGUL SYLLABLE RYE
|
||||
B85C ; LV # Lo HANGUL SYLLABLE RO
|
||||
B878 ; LV # Lo HANGUL SYLLABLE RWA
|
||||
B894 ; LV # Lo HANGUL SYLLABLE RWAE
|
||||
B8B0 ; LV # Lo HANGUL SYLLABLE ROE
|
||||
B8CC ; LV # Lo HANGUL SYLLABLE RYO
|
||||
B8E8 ; LV # Lo HANGUL SYLLABLE RU
|
||||
B904 ; LV # Lo HANGUL SYLLABLE RWEO
|
||||
B920 ; LV # Lo HANGUL SYLLABLE RWE
|
||||
B93C ; LV # Lo HANGUL SYLLABLE RWI
|
||||
B958 ; LV # Lo HANGUL SYLLABLE RYU
|
||||
B974 ; LV # Lo HANGUL SYLLABLE REU
|
||||
B990 ; LV # Lo HANGUL SYLLABLE RYI
|
||||
B9AC ; LV # Lo HANGUL SYLLABLE RI
|
||||
B9C8 ; LV # Lo HANGUL SYLLABLE MA
|
||||
B9E4 ; LV # Lo HANGUL SYLLABLE MAE
|
||||
BA00 ; LV # Lo HANGUL SYLLABLE MYA
|
||||
BA1C ; LV # Lo HANGUL SYLLABLE MYAE
|
||||
BA38 ; LV # Lo HANGUL SYLLABLE MEO
|
||||
BA54 ; LV # Lo HANGUL SYLLABLE ME
|
||||
BA70 ; LV # Lo HANGUL SYLLABLE MYEO
|
||||
BA8C ; LV # Lo HANGUL SYLLABLE MYE
|
||||
BAA8 ; LV # Lo HANGUL SYLLABLE MO
|
||||
BAC4 ; LV # Lo HANGUL SYLLABLE MWA
|
||||
BAE0 ; LV # Lo HANGUL SYLLABLE MWAE
|
||||
BAFC ; LV # Lo HANGUL SYLLABLE MOE
|
||||
BB18 ; LV # Lo HANGUL SYLLABLE MYO
|
||||
BB34 ; LV # Lo HANGUL SYLLABLE MU
|
||||
BB50 ; LV # Lo HANGUL SYLLABLE MWEO
|
||||
BB6C ; LV # Lo HANGUL SYLLABLE MWE
|
||||
BB88 ; LV # Lo HANGUL SYLLABLE MWI
|
||||
BBA4 ; LV # Lo HANGUL SYLLABLE MYU
|
||||
BBC0 ; LV # Lo HANGUL SYLLABLE MEU
|
||||
BBDC ; LV # Lo HANGUL SYLLABLE MYI
|
||||
BBF8 ; LV # Lo HANGUL SYLLABLE MI
|
||||
BC14 ; LV # Lo HANGUL SYLLABLE BA
|
||||
BC30 ; LV # Lo HANGUL SYLLABLE BAE
|
||||
BC4C ; LV # Lo HANGUL SYLLABLE BYA
|
||||
BC68 ; LV # Lo HANGUL SYLLABLE BYAE
|
||||
BC84 ; LV # Lo HANGUL SYLLABLE BEO
|
||||
BCA0 ; LV # Lo HANGUL SYLLABLE BE
|
||||
BCBC ; LV # Lo HANGUL SYLLABLE BYEO
|
||||
BCD8 ; LV # Lo HANGUL SYLLABLE BYE
|
||||
BCF4 ; LV # Lo HANGUL SYLLABLE BO
|
||||
BD10 ; LV # Lo HANGUL SYLLABLE BWA
|
||||
BD2C ; LV # Lo HANGUL SYLLABLE BWAE
|
||||
BD48 ; LV # Lo HANGUL SYLLABLE BOE
|
||||
BD64 ; LV # Lo HANGUL SYLLABLE BYO
|
||||
BD80 ; LV # Lo HANGUL SYLLABLE BU
|
||||
BD9C ; LV # Lo HANGUL SYLLABLE BWEO
|
||||
BDB8 ; LV # Lo HANGUL SYLLABLE BWE
|
||||
BDD4 ; LV # Lo HANGUL SYLLABLE BWI
|
||||
BDF0 ; LV # Lo HANGUL SYLLABLE BYU
|
||||
BE0C ; LV # Lo HANGUL SYLLABLE BEU
|
||||
BE28 ; LV # Lo HANGUL SYLLABLE BYI
|
||||
BE44 ; LV # Lo HANGUL SYLLABLE BI
|
||||
BE60 ; LV # Lo HANGUL SYLLABLE BBA
|
||||
BE7C ; LV # Lo HANGUL SYLLABLE BBAE
|
||||
BE98 ; LV # Lo HANGUL SYLLABLE BBYA
|
||||
BEB4 ; LV # Lo HANGUL SYLLABLE BBYAE
|
||||
BED0 ; LV # Lo HANGUL SYLLABLE BBEO
|
||||
BEEC ; LV # Lo HANGUL SYLLABLE BBE
|
||||
BF08 ; LV # Lo HANGUL SYLLABLE BBYEO
|
||||
BF24 ; LV # Lo HANGUL SYLLABLE BBYE
|
||||
BF40 ; LV # Lo HANGUL SYLLABLE BBO
|
||||
BF5C ; LV # Lo HANGUL SYLLABLE BBWA
|
||||
BF78 ; LV # Lo HANGUL SYLLABLE BBWAE
|
||||
BF94 ; LV # Lo HANGUL SYLLABLE BBOE
|
||||
BFB0 ; LV # Lo HANGUL SYLLABLE BBYO
|
||||
BFCC ; LV # Lo HANGUL SYLLABLE BBU
|
||||
BFE8 ; LV # Lo HANGUL SYLLABLE BBWEO
|
||||
C004 ; LV # Lo HANGUL SYLLABLE BBWE
|
||||
C020 ; LV # Lo HANGUL SYLLABLE BBWI
|
||||
C03C ; LV # Lo HANGUL SYLLABLE BBYU
|
||||
C058 ; LV # Lo HANGUL SYLLABLE BBEU
|
||||
C074 ; LV # Lo HANGUL SYLLABLE BBYI
|
||||
C090 ; LV # Lo HANGUL SYLLABLE BBI
|
||||
C0AC ; LV # Lo HANGUL SYLLABLE SA
|
||||
C0C8 ; LV # Lo HANGUL SYLLABLE SAE
|
||||
C0E4 ; LV # Lo HANGUL SYLLABLE SYA
|
||||
C100 ; LV # Lo HANGUL SYLLABLE SYAE
|
||||
C11C ; LV # Lo HANGUL SYLLABLE SEO
|
||||
C138 ; LV # Lo HANGUL SYLLABLE SE
|
||||
C154 ; LV # Lo HANGUL SYLLABLE SYEO
|
||||
C170 ; LV # Lo HANGUL SYLLABLE SYE
|
||||
C18C ; LV # Lo HANGUL SYLLABLE SO
|
||||
C1A8 ; LV # Lo HANGUL SYLLABLE SWA
|
||||
C1C4 ; LV # Lo HANGUL SYLLABLE SWAE
|
||||
C1E0 ; LV # Lo HANGUL SYLLABLE SOE
|
||||
C1FC ; LV # Lo HANGUL SYLLABLE SYO
|
||||
C218 ; LV # Lo HANGUL SYLLABLE SU
|
||||
C234 ; LV # Lo HANGUL SYLLABLE SWEO
|
||||
C250 ; LV # Lo HANGUL SYLLABLE SWE
|
||||
C26C ; LV # Lo HANGUL SYLLABLE SWI
|
||||
C288 ; LV # Lo HANGUL SYLLABLE SYU
|
||||
C2A4 ; LV # Lo HANGUL SYLLABLE SEU
|
||||
C2C0 ; LV # Lo HANGUL SYLLABLE SYI
|
||||
C2DC ; LV # Lo HANGUL SYLLABLE SI
|
||||
C2F8 ; LV # Lo HANGUL SYLLABLE SSA
|
||||
C314 ; LV # Lo HANGUL SYLLABLE SSAE
|
||||
C330 ; LV # Lo HANGUL SYLLABLE SSYA
|
||||
C34C ; LV # Lo HANGUL SYLLABLE SSYAE
|
||||
C368 ; LV # Lo HANGUL SYLLABLE SSEO
|
||||
C384 ; LV # Lo HANGUL SYLLABLE SSE
|
||||
C3A0 ; LV # Lo HANGUL SYLLABLE SSYEO
|
||||
C3BC ; LV # Lo HANGUL SYLLABLE SSYE
|
||||
C3D8 ; LV # Lo HANGUL SYLLABLE SSO
|
||||
C3F4 ; LV # Lo HANGUL SYLLABLE SSWA
|
||||
C410 ; LV # Lo HANGUL SYLLABLE SSWAE
|
||||
C42C ; LV # Lo HANGUL SYLLABLE SSOE
|
||||
C448 ; LV # Lo HANGUL SYLLABLE SSYO
|
||||
C464 ; LV # Lo HANGUL SYLLABLE SSU
|
||||
C480 ; LV # Lo HANGUL SYLLABLE SSWEO
|
||||
C49C ; LV # Lo HANGUL SYLLABLE SSWE
|
||||
C4B8 ; LV # Lo HANGUL SYLLABLE SSWI
|
||||
C4D4 ; LV # Lo HANGUL SYLLABLE SSYU
|
||||
C4F0 ; LV # Lo HANGUL SYLLABLE SSEU
|
||||
C50C ; LV # Lo HANGUL SYLLABLE SSYI
|
||||
C528 ; LV # Lo HANGUL SYLLABLE SSI
|
||||
C544 ; LV # Lo HANGUL SYLLABLE A
|
||||
C560 ; LV # Lo HANGUL SYLLABLE AE
|
||||
C57C ; LV # Lo HANGUL SYLLABLE YA
|
||||
C598 ; LV # Lo HANGUL SYLLABLE YAE
|
||||
C5B4 ; LV # Lo HANGUL SYLLABLE EO
|
||||
C5D0 ; LV # Lo HANGUL SYLLABLE E
|
||||
C5EC ; LV # Lo HANGUL SYLLABLE YEO
|
||||
C608 ; LV # Lo HANGUL SYLLABLE YE
|
||||
C624 ; LV # Lo HANGUL SYLLABLE O
|
||||
C640 ; LV # Lo HANGUL SYLLABLE WA
|
||||
C65C ; LV # Lo HANGUL SYLLABLE WAE
|
||||
C678 ; LV # Lo HANGUL SYLLABLE OE
|
||||
C694 ; LV # Lo HANGUL SYLLABLE YO
|
||||
C6B0 ; LV # Lo HANGUL SYLLABLE U
|
||||
C6CC ; LV # Lo HANGUL SYLLABLE WEO
|
||||
C6E8 ; LV # Lo HANGUL SYLLABLE WE
|
||||
C704 ; LV # Lo HANGUL SYLLABLE WI
|
||||
C720 ; LV # Lo HANGUL SYLLABLE YU
|
||||
C73C ; LV # Lo HANGUL SYLLABLE EU
|
||||
C758 ; LV # Lo HANGUL SYLLABLE YI
|
||||
C774 ; LV # Lo HANGUL SYLLABLE I
|
||||
C790 ; LV # Lo HANGUL SYLLABLE JA
|
||||
C7AC ; LV # Lo HANGUL SYLLABLE JAE
|
||||
C7C8 ; LV # Lo HANGUL SYLLABLE JYA
|
||||
C7E4 ; LV # Lo HANGUL SYLLABLE JYAE
|
||||
C800 ; LV # Lo HANGUL SYLLABLE JEO
|
||||
C81C ; LV # Lo HANGUL SYLLABLE JE
|
||||
C838 ; LV # Lo HANGUL SYLLABLE JYEO
|
||||
C854 ; LV # Lo HANGUL SYLLABLE JYE
|
||||
C870 ; LV # Lo HANGUL SYLLABLE JO
|
||||
C88C ; LV # Lo HANGUL SYLLABLE JWA
|
||||
C8A8 ; LV # Lo HANGUL SYLLABLE JWAE
|
||||
C8C4 ; LV # Lo HANGUL SYLLABLE JOE
|
||||
C8E0 ; LV # Lo HANGUL SYLLABLE JYO
|
||||
C8FC ; LV # Lo HANGUL SYLLABLE JU
|
||||
C918 ; LV # Lo HANGUL SYLLABLE JWEO
|
||||
C934 ; LV # Lo HANGUL SYLLABLE JWE
|
||||
C950 ; LV # Lo HANGUL SYLLABLE JWI
|
||||
C96C ; LV # Lo HANGUL SYLLABLE JYU
|
||||
C988 ; LV # Lo HANGUL SYLLABLE JEU
|
||||
C9A4 ; LV # Lo HANGUL SYLLABLE JYI
|
||||
C9C0 ; LV # Lo HANGUL SYLLABLE JI
|
||||
C9DC ; LV # Lo HANGUL SYLLABLE JJA
|
||||
C9F8 ; LV # Lo HANGUL SYLLABLE JJAE
|
||||
CA14 ; LV # Lo HANGUL SYLLABLE JJYA
|
||||
CA30 ; LV # Lo HANGUL SYLLABLE JJYAE
|
||||
CA4C ; LV # Lo HANGUL SYLLABLE JJEO
|
||||
CA68 ; LV # Lo HANGUL SYLLABLE JJE
|
||||
CA84 ; LV # Lo HANGUL SYLLABLE JJYEO
|
||||
CAA0 ; LV # Lo HANGUL SYLLABLE JJYE
|
||||
CABC ; LV # Lo HANGUL SYLLABLE JJO
|
||||
CAD8 ; LV # Lo HANGUL SYLLABLE JJWA
|
||||
CAF4 ; LV # Lo HANGUL SYLLABLE JJWAE
|
||||
CB10 ; LV # Lo HANGUL SYLLABLE JJOE
|
||||
CB2C ; LV # Lo HANGUL SYLLABLE JJYO
|
||||
CB48 ; LV # Lo HANGUL SYLLABLE JJU
|
||||
CB64 ; LV # Lo HANGUL SYLLABLE JJWEO
|
||||
CB80 ; LV # Lo HANGUL SYLLABLE JJWE
|
||||
CB9C ; LV # Lo HANGUL SYLLABLE JJWI
|
||||
CBB8 ; LV # Lo HANGUL SYLLABLE JJYU
|
||||
CBD4 ; LV # Lo HANGUL SYLLABLE JJEU
|
||||
CBF0 ; LV # Lo HANGUL SYLLABLE JJYI
|
||||
CC0C ; LV # Lo HANGUL SYLLABLE JJI
|
||||
CC28 ; LV # Lo HANGUL SYLLABLE CA
|
||||
CC44 ; LV # Lo HANGUL SYLLABLE CAE
|
||||
CC60 ; LV # Lo HANGUL SYLLABLE CYA
|
||||
CC7C ; LV # Lo HANGUL SYLLABLE CYAE
|
||||
CC98 ; LV # Lo HANGUL SYLLABLE CEO
|
||||
CCB4 ; LV # Lo HANGUL SYLLABLE CE
|
||||
CCD0 ; LV # Lo HANGUL SYLLABLE CYEO
|
||||
CCEC ; LV # Lo HANGUL SYLLABLE CYE
|
||||
CD08 ; LV # Lo HANGUL SYLLABLE CO
|
||||
CD24 ; LV # Lo HANGUL SYLLABLE CWA
|
||||
CD40 ; LV # Lo HANGUL SYLLABLE CWAE
|
||||
CD5C ; LV # Lo HANGUL SYLLABLE COE
|
||||
CD78 ; LV # Lo HANGUL SYLLABLE CYO
|
||||
CD94 ; LV # Lo HANGUL SYLLABLE CU
|
||||
CDB0 ; LV # Lo HANGUL SYLLABLE CWEO
|
||||
CDCC ; LV # Lo HANGUL SYLLABLE CWE
|
||||
CDE8 ; LV # Lo HANGUL SYLLABLE CWI
|
||||
CE04 ; LV # Lo HANGUL SYLLABLE CYU
|
||||
CE20 ; LV # Lo HANGUL SYLLABLE CEU
|
||||
CE3C ; LV # Lo HANGUL SYLLABLE CYI
|
||||
CE58 ; LV # Lo HANGUL SYLLABLE CI
|
||||
CE74 ; LV # Lo HANGUL SYLLABLE KA
|
||||
CE90 ; LV # Lo HANGUL SYLLABLE KAE
|
||||
CEAC ; LV # Lo HANGUL SYLLABLE KYA
|
||||
CEC8 ; LV # Lo HANGUL SYLLABLE KYAE
|
||||
CEE4 ; LV # Lo HANGUL SYLLABLE KEO
|
||||
CF00 ; LV # Lo HANGUL SYLLABLE KE
|
||||
CF1C ; LV # Lo HANGUL SYLLABLE KYEO
|
||||
CF38 ; LV # Lo HANGUL SYLLABLE KYE
|
||||
CF54 ; LV # Lo HANGUL SYLLABLE KO
|
||||
CF70 ; LV # Lo HANGUL SYLLABLE KWA
|
||||
CF8C ; LV # Lo HANGUL SYLLABLE KWAE
|
||||
CFA8 ; LV # Lo HANGUL SYLLABLE KOE
|
||||
CFC4 ; LV # Lo HANGUL SYLLABLE KYO
|
||||
CFE0 ; LV # Lo HANGUL SYLLABLE KU
|
||||
CFFC ; LV # Lo HANGUL SYLLABLE KWEO
|
||||
D018 ; LV # Lo HANGUL SYLLABLE KWE
|
||||
D034 ; LV # Lo HANGUL SYLLABLE KWI
|
||||
D050 ; LV # Lo HANGUL SYLLABLE KYU
|
||||
D06C ; LV # Lo HANGUL SYLLABLE KEU
|
||||
D088 ; LV # Lo HANGUL SYLLABLE KYI
|
||||
D0A4 ; LV # Lo HANGUL SYLLABLE KI
|
||||
D0C0 ; LV # Lo HANGUL SYLLABLE TA
|
||||
D0DC ; LV # Lo HANGUL SYLLABLE TAE
|
||||
D0F8 ; LV # Lo HANGUL SYLLABLE TYA
|
||||
D114 ; LV # Lo HANGUL SYLLABLE TYAE
|
||||
D130 ; LV # Lo HANGUL SYLLABLE TEO
|
||||
D14C ; LV # Lo HANGUL SYLLABLE TE
|
||||
D168 ; LV # Lo HANGUL SYLLABLE TYEO
|
||||
D184 ; LV # Lo HANGUL SYLLABLE TYE
|
||||
D1A0 ; LV # Lo HANGUL SYLLABLE TO
|
||||
D1BC ; LV # Lo HANGUL SYLLABLE TWA
|
||||
D1D8 ; LV # Lo HANGUL SYLLABLE TWAE
|
||||
D1F4 ; LV # Lo HANGUL SYLLABLE TOE
|
||||
D210 ; LV # Lo HANGUL SYLLABLE TYO
|
||||
D22C ; LV # Lo HANGUL SYLLABLE TU
|
||||
D248 ; LV # Lo HANGUL SYLLABLE TWEO
|
||||
D264 ; LV # Lo HANGUL SYLLABLE TWE
|
||||
D280 ; LV # Lo HANGUL SYLLABLE TWI
|
||||
D29C ; LV # Lo HANGUL SYLLABLE TYU
|
||||
D2B8 ; LV # Lo HANGUL SYLLABLE TEU
|
||||
D2D4 ; LV # Lo HANGUL SYLLABLE TYI
|
||||
D2F0 ; LV # Lo HANGUL SYLLABLE TI
|
||||
D30C ; LV # Lo HANGUL SYLLABLE PA
|
||||
D328 ; LV # Lo HANGUL SYLLABLE PAE
|
||||
D344 ; LV # Lo HANGUL SYLLABLE PYA
|
||||
D360 ; LV # Lo HANGUL SYLLABLE PYAE
|
||||
D37C ; LV # Lo HANGUL SYLLABLE PEO
|
||||
D398 ; LV # Lo HANGUL SYLLABLE PE
|
||||
D3B4 ; LV # Lo HANGUL SYLLABLE PYEO
|
||||
D3D0 ; LV # Lo HANGUL SYLLABLE PYE
|
||||
D3EC ; LV # Lo HANGUL SYLLABLE PO
|
||||
D408 ; LV # Lo HANGUL SYLLABLE PWA
|
||||
D424 ; LV # Lo HANGUL SYLLABLE PWAE
|
||||
D440 ; LV # Lo HANGUL SYLLABLE POE
|
||||
D45C ; LV # Lo HANGUL SYLLABLE PYO
|
||||
D478 ; LV # Lo HANGUL SYLLABLE PU
|
||||
D494 ; LV # Lo HANGUL SYLLABLE PWEO
|
||||
D4B0 ; LV # Lo HANGUL SYLLABLE PWE
|
||||
D4CC ; LV # Lo HANGUL SYLLABLE PWI
|
||||
D4E8 ; LV # Lo HANGUL SYLLABLE PYU
|
||||
D504 ; LV # Lo HANGUL SYLLABLE PEU
|
||||
D520 ; LV # Lo HANGUL SYLLABLE PYI
|
||||
D53C ; LV # Lo HANGUL SYLLABLE PI
|
||||
D558 ; LV # Lo HANGUL SYLLABLE HA
|
||||
D574 ; LV # Lo HANGUL SYLLABLE HAE
|
||||
D590 ; LV # Lo HANGUL SYLLABLE HYA
|
||||
D5AC ; LV # Lo HANGUL SYLLABLE HYAE
|
||||
D5C8 ; LV # Lo HANGUL SYLLABLE HEO
|
||||
D5E4 ; LV # Lo HANGUL SYLLABLE HE
|
||||
D600 ; LV # Lo HANGUL SYLLABLE HYEO
|
||||
D61C ; LV # Lo HANGUL SYLLABLE HYE
|
||||
D638 ; LV # Lo HANGUL SYLLABLE HO
|
||||
D654 ; LV # Lo HANGUL SYLLABLE HWA
|
||||
D670 ; LV # Lo HANGUL SYLLABLE HWAE
|
||||
D68C ; LV # Lo HANGUL SYLLABLE HOE
|
||||
D6A8 ; LV # Lo HANGUL SYLLABLE HYO
|
||||
D6C4 ; LV # Lo HANGUL SYLLABLE HU
|
||||
D6E0 ; LV # Lo HANGUL SYLLABLE HWEO
|
||||
D6FC ; LV # Lo HANGUL SYLLABLE HWE
|
||||
D718 ; LV # Lo HANGUL SYLLABLE HWI
|
||||
D734 ; LV # Lo HANGUL SYLLABLE HYU
|
||||
D750 ; LV # Lo HANGUL SYLLABLE HEU
|
||||
D76C ; LV # Lo HANGUL SYLLABLE HYI
|
||||
D788 ; LV # Lo HANGUL SYLLABLE HI
|
||||
|
||||
# Total code points: 399
|
||||
|
||||
# ================================================
|
||||
|
||||
# Hangul_Syllable_Type=LVT_Syllable
|
||||
|
||||
AC01..AC1B ; LVT # Lo [27] HANGUL SYLLABLE GAG..HANGUL SYLLABLE GAH
|
||||
AC1D..AC37 ; LVT # Lo [27] HANGUL SYLLABLE GAEG..HANGUL SYLLABLE GAEH
|
||||
AC39..AC53 ; LVT # Lo [27] HANGUL SYLLABLE GYAG..HANGUL SYLLABLE GYAH
|
||||
AC55..AC6F ; LVT # Lo [27] HANGUL SYLLABLE GYAEG..HANGUL SYLLABLE GYAEH
|
||||
AC71..AC8B ; LVT # Lo [27] HANGUL SYLLABLE GEOG..HANGUL SYLLABLE GEOH
|
||||
AC8D..ACA7 ; LVT # Lo [27] HANGUL SYLLABLE GEG..HANGUL SYLLABLE GEH
|
||||
ACA9..ACC3 ; LVT # Lo [27] HANGUL SYLLABLE GYEOG..HANGUL SYLLABLE GYEOH
|
||||
ACC5..ACDF ; LVT # Lo [27] HANGUL SYLLABLE GYEG..HANGUL SYLLABLE GYEH
|
||||
ACE1..ACFB ; LVT # Lo [27] HANGUL SYLLABLE GOG..HANGUL SYLLABLE GOH
|
||||
ACFD..AD17 ; LVT # Lo [27] HANGUL SYLLABLE GWAG..HANGUL SYLLABLE GWAH
|
||||
AD19..AD33 ; LVT # Lo [27] HANGUL SYLLABLE GWAEG..HANGUL SYLLABLE GWAEH
|
||||
AD35..AD4F ; LVT # Lo [27] HANGUL SYLLABLE GOEG..HANGUL SYLLABLE GOEH
|
||||
AD51..AD6B ; LVT # Lo [27] HANGUL SYLLABLE GYOG..HANGUL SYLLABLE GYOH
|
||||
AD6D..AD87 ; LVT # Lo [27] HANGUL SYLLABLE GUG..HANGUL SYLLABLE GUH
|
||||
AD89..ADA3 ; LVT # Lo [27] HANGUL SYLLABLE GWEOG..HANGUL SYLLABLE GWEOH
|
||||
ADA5..ADBF ; LVT # Lo [27] HANGUL SYLLABLE GWEG..HANGUL SYLLABLE GWEH
|
||||
ADC1..ADDB ; LVT # Lo [27] HANGUL SYLLABLE GWIG..HANGUL SYLLABLE GWIH
|
||||
ADDD..ADF7 ; LVT # Lo [27] HANGUL SYLLABLE GYUG..HANGUL SYLLABLE GYUH
|
||||
ADF9..AE13 ; LVT # Lo [27] HANGUL SYLLABLE GEUG..HANGUL SYLLABLE GEUH
|
||||
AE15..AE2F ; LVT # Lo [27] HANGUL SYLLABLE GYIG..HANGUL SYLLABLE GYIH
|
||||
AE31..AE4B ; LVT # Lo [27] HANGUL SYLLABLE GIG..HANGUL SYLLABLE GIH
|
||||
AE4D..AE67 ; LVT # Lo [27] HANGUL SYLLABLE GGAG..HANGUL SYLLABLE GGAH
|
||||
AE69..AE83 ; LVT # Lo [27] HANGUL SYLLABLE GGAEG..HANGUL SYLLABLE GGAEH
|
||||
AE85..AE9F ; LVT # Lo [27] HANGUL SYLLABLE GGYAG..HANGUL SYLLABLE GGYAH
|
||||
AEA1..AEBB ; LVT # Lo [27] HANGUL SYLLABLE GGYAEG..HANGUL SYLLABLE GGYAEH
|
||||
AEBD..AED7 ; LVT # Lo [27] HANGUL SYLLABLE GGEOG..HANGUL SYLLABLE GGEOH
|
||||
AED9..AEF3 ; LVT # Lo [27] HANGUL SYLLABLE GGEG..HANGUL SYLLABLE GGEH
|
||||
AEF5..AF0F ; LVT # Lo [27] HANGUL SYLLABLE GGYEOG..HANGUL SYLLABLE GGYEOH
|
||||
AF11..AF2B ; LVT # Lo [27] HANGUL SYLLABLE GGYEG..HANGUL SYLLABLE GGYEH
|
||||
AF2D..AF47 ; LVT # Lo [27] HANGUL SYLLABLE GGOG..HANGUL SYLLABLE GGOH
|
||||
AF49..AF63 ; LVT # Lo [27] HANGUL SYLLABLE GGWAG..HANGUL SYLLABLE GGWAH
|
||||
AF65..AF7F ; LVT # Lo [27] HANGUL SYLLABLE GGWAEG..HANGUL SYLLABLE GGWAEH
|
||||
AF81..AF9B ; LVT # Lo [27] HANGUL SYLLABLE GGOEG..HANGUL SYLLABLE GGOEH
|
||||
AF9D..AFB7 ; LVT # Lo [27] HANGUL SYLLABLE GGYOG..HANGUL SYLLABLE GGYOH
|
||||
AFB9..AFD3 ; LVT # Lo [27] HANGUL SYLLABLE GGUG..HANGUL SYLLABLE GGUH
|
||||
AFD5..AFEF ; LVT # Lo [27] HANGUL SYLLABLE GGWEOG..HANGUL SYLLABLE GGWEOH
|
||||
AFF1..B00B ; LVT # Lo [27] HANGUL SYLLABLE GGWEG..HANGUL SYLLABLE GGWEH
|
||||
B00D..B027 ; LVT # Lo [27] HANGUL SYLLABLE GGWIG..HANGUL SYLLABLE GGWIH
|
||||
B029..B043 ; LVT # Lo [27] HANGUL SYLLABLE GGYUG..HANGUL SYLLABLE GGYUH
|
||||
B045..B05F ; LVT # Lo [27] HANGUL SYLLABLE GGEUG..HANGUL SYLLABLE GGEUH
|
||||
B061..B07B ; LVT # Lo [27] HANGUL SYLLABLE GGYIG..HANGUL SYLLABLE GGYIH
|
||||
B07D..B097 ; LVT # Lo [27] HANGUL SYLLABLE GGIG..HANGUL SYLLABLE GGIH
|
||||
B099..B0B3 ; LVT # Lo [27] HANGUL SYLLABLE NAG..HANGUL SYLLABLE NAH
|
||||
B0B5..B0CF ; LVT # Lo [27] HANGUL SYLLABLE NAEG..HANGUL SYLLABLE NAEH
|
||||
B0D1..B0EB ; LVT # Lo [27] HANGUL SYLLABLE NYAG..HANGUL SYLLABLE NYAH
|
||||
B0ED..B107 ; LVT # Lo [27] HANGUL SYLLABLE NYAEG..HANGUL SYLLABLE NYAEH
|
||||
B109..B123 ; LVT # Lo [27] HANGUL SYLLABLE NEOG..HANGUL SYLLABLE NEOH
|
||||
B125..B13F ; LVT # Lo [27] HANGUL SYLLABLE NEG..HANGUL SYLLABLE NEH
|
||||
B141..B15B ; LVT # Lo [27] HANGUL SYLLABLE NYEOG..HANGUL SYLLABLE NYEOH
|
||||
B15D..B177 ; LVT # Lo [27] HANGUL SYLLABLE NYEG..HANGUL SYLLABLE NYEH
|
||||
B179..B193 ; LVT # Lo [27] HANGUL SYLLABLE NOG..HANGUL SYLLABLE NOH
|
||||
B195..B1AF ; LVT # Lo [27] HANGUL SYLLABLE NWAG..HANGUL SYLLABLE NWAH
|
||||
B1B1..B1CB ; LVT # Lo [27] HANGUL SYLLABLE NWAEG..HANGUL SYLLABLE NWAEH
|
||||
B1CD..B1E7 ; LVT # Lo [27] HANGUL SYLLABLE NOEG..HANGUL SYLLABLE NOEH
|
||||
B1E9..B203 ; LVT # Lo [27] HANGUL SYLLABLE NYOG..HANGUL SYLLABLE NYOH
|
||||
B205..B21F ; LVT # Lo [27] HANGUL SYLLABLE NUG..HANGUL SYLLABLE NUH
|
||||
B221..B23B ; LVT # Lo [27] HANGUL SYLLABLE NWEOG..HANGUL SYLLABLE NWEOH
|
||||
B23D..B257 ; LVT # Lo [27] HANGUL SYLLABLE NWEG..HANGUL SYLLABLE NWEH
|
||||
B259..B273 ; LVT # Lo [27] HANGUL SYLLABLE NWIG..HANGUL SYLLABLE NWIH
|
||||
B275..B28F ; LVT # Lo [27] HANGUL SYLLABLE NYUG..HANGUL SYLLABLE NYUH
|
||||
B291..B2AB ; LVT # Lo [27] HANGUL SYLLABLE NEUG..HANGUL SYLLABLE NEUH
|
||||
B2AD..B2C7 ; LVT # Lo [27] HANGUL SYLLABLE NYIG..HANGUL SYLLABLE NYIH
|
||||
B2C9..B2E3 ; LVT # Lo [27] HANGUL SYLLABLE NIG..HANGUL SYLLABLE NIH
|
||||
B2E5..B2FF ; LVT # Lo [27] HANGUL SYLLABLE DAG..HANGUL SYLLABLE DAH
|
||||
B301..B31B ; LVT # Lo [27] HANGUL SYLLABLE DAEG..HANGUL SYLLABLE DAEH
|
||||
B31D..B337 ; LVT # Lo [27] HANGUL SYLLABLE DYAG..HANGUL SYLLABLE DYAH
|
||||
B339..B353 ; LVT # Lo [27] HANGUL SYLLABLE DYAEG..HANGUL SYLLABLE DYAEH
|
||||
B355..B36F ; LVT # Lo [27] HANGUL SYLLABLE DEOG..HANGUL SYLLABLE DEOH
|
||||
B371..B38B ; LVT # Lo [27] HANGUL SYLLABLE DEG..HANGUL SYLLABLE DEH
|
||||
B38D..B3A7 ; LVT # Lo [27] HANGUL SYLLABLE DYEOG..HANGUL SYLLABLE DYEOH
|
||||
B3A9..B3C3 ; LVT # Lo [27] HANGUL SYLLABLE DYEG..HANGUL SYLLABLE DYEH
|
||||
B3C5..B3DF ; LVT # Lo [27] HANGUL SYLLABLE DOG..HANGUL SYLLABLE DOH
|
||||
B3E1..B3FB ; LVT # Lo [27] HANGUL SYLLABLE DWAG..HANGUL SYLLABLE DWAH
|
||||
B3FD..B417 ; LVT # Lo [27] HANGUL SYLLABLE DWAEG..HANGUL SYLLABLE DWAEH
|
||||
B419..B433 ; LVT # Lo [27] HANGUL SYLLABLE DOEG..HANGUL SYLLABLE DOEH
|
||||
B435..B44F ; LVT # Lo [27] HANGUL SYLLABLE DYOG..HANGUL SYLLABLE DYOH
|
||||
B451..B46B ; LVT # Lo [27] HANGUL SYLLABLE DUG..HANGUL SYLLABLE DUH
|
||||
B46D..B487 ; LVT # Lo [27] HANGUL SYLLABLE DWEOG..HANGUL SYLLABLE DWEOH
|
||||
B489..B4A3 ; LVT # Lo [27] HANGUL SYLLABLE DWEG..HANGUL SYLLABLE DWEH
|
||||
B4A5..B4BF ; LVT # Lo [27] HANGUL SYLLABLE DWIG..HANGUL SYLLABLE DWIH
|
||||
B4C1..B4DB ; LVT # Lo [27] HANGUL SYLLABLE DYUG..HANGUL SYLLABLE DYUH
|
||||
B4DD..B4F7 ; LVT # Lo [27] HANGUL SYLLABLE DEUG..HANGUL SYLLABLE DEUH
|
||||
B4F9..B513 ; LVT # Lo [27] HANGUL SYLLABLE DYIG..HANGUL SYLLABLE DYIH
|
||||
B515..B52F ; LVT # Lo [27] HANGUL SYLLABLE DIG..HANGUL SYLLABLE DIH
|
||||
B531..B54B ; LVT # Lo [27] HANGUL SYLLABLE DDAG..HANGUL SYLLABLE DDAH
|
||||
B54D..B567 ; LVT # Lo [27] HANGUL SYLLABLE DDAEG..HANGUL SYLLABLE DDAEH
|
||||
B569..B583 ; LVT # Lo [27] HANGUL SYLLABLE DDYAG..HANGUL SYLLABLE DDYAH
|
||||
B585..B59F ; LVT # Lo [27] HANGUL SYLLABLE DDYAEG..HANGUL SYLLABLE DDYAEH
|
||||
B5A1..B5BB ; LVT # Lo [27] HANGUL SYLLABLE DDEOG..HANGUL SYLLABLE DDEOH
|
||||
B5BD..B5D7 ; LVT # Lo [27] HANGUL SYLLABLE DDEG..HANGUL SYLLABLE DDEH
|
||||
B5D9..B5F3 ; LVT # Lo [27] HANGUL SYLLABLE DDYEOG..HANGUL SYLLABLE DDYEOH
|
||||
B5F5..B60F ; LVT # Lo [27] HANGUL SYLLABLE DDYEG..HANGUL SYLLABLE DDYEH
|
||||
B611..B62B ; LVT # Lo [27] HANGUL SYLLABLE DDOG..HANGUL SYLLABLE DDOH
|
||||
B62D..B647 ; LVT # Lo [27] HANGUL SYLLABLE DDWAG..HANGUL SYLLABLE DDWAH
|
||||
B649..B663 ; LVT # Lo [27] HANGUL SYLLABLE DDWAEG..HANGUL SYLLABLE DDWAEH
|
||||
B665..B67F ; LVT # Lo [27] HANGUL SYLLABLE DDOEG..HANGUL SYLLABLE DDOEH
|
||||
B681..B69B ; LVT # Lo [27] HANGUL SYLLABLE DDYOG..HANGUL SYLLABLE DDYOH
|
||||
B69D..B6B7 ; LVT # Lo [27] HANGUL SYLLABLE DDUG..HANGUL SYLLABLE DDUH
|
||||
B6B9..B6D3 ; LVT # Lo [27] HANGUL SYLLABLE DDWEOG..HANGUL SYLLABLE DDWEOH
|
||||
B6D5..B6EF ; LVT # Lo [27] HANGUL SYLLABLE DDWEG..HANGUL SYLLABLE DDWEH
|
||||
B6F1..B70B ; LVT # Lo [27] HANGUL SYLLABLE DDWIG..HANGUL SYLLABLE DDWIH
|
||||
B70D..B727 ; LVT # Lo [27] HANGUL SYLLABLE DDYUG..HANGUL SYLLABLE DDYUH
|
||||
B729..B743 ; LVT # Lo [27] HANGUL SYLLABLE DDEUG..HANGUL SYLLABLE DDEUH
|
||||
B745..B75F ; LVT # Lo [27] HANGUL SYLLABLE DDYIG..HANGUL SYLLABLE DDYIH
|
||||
B761..B77B ; LVT # Lo [27] HANGUL SYLLABLE DDIG..HANGUL SYLLABLE DDIH
|
||||
B77D..B797 ; LVT # Lo [27] HANGUL SYLLABLE RAG..HANGUL SYLLABLE RAH
|
||||
B799..B7B3 ; LVT # Lo [27] HANGUL SYLLABLE RAEG..HANGUL SYLLABLE RAEH
|
||||
B7B5..B7CF ; LVT # Lo [27] HANGUL SYLLABLE RYAG..HANGUL SYLLABLE RYAH
|
||||
B7D1..B7EB ; LVT # Lo [27] HANGUL SYLLABLE RYAEG..HANGUL SYLLABLE RYAEH
|
||||
B7ED..B807 ; LVT # Lo [27] HANGUL SYLLABLE REOG..HANGUL SYLLABLE REOH
|
||||
B809..B823 ; LVT # Lo [27] HANGUL SYLLABLE REG..HANGUL SYLLABLE REH
|
||||
B825..B83F ; LVT # Lo [27] HANGUL SYLLABLE RYEOG..HANGUL SYLLABLE RYEOH
|
||||
B841..B85B ; LVT # Lo [27] HANGUL SYLLABLE RYEG..HANGUL SYLLABLE RYEH
|
||||
B85D..B877 ; LVT # Lo [27] HANGUL SYLLABLE ROG..HANGUL SYLLABLE ROH
|
||||
B879..B893 ; LVT # Lo [27] HANGUL SYLLABLE RWAG..HANGUL SYLLABLE RWAH
|
||||
B895..B8AF ; LVT # Lo [27] HANGUL SYLLABLE RWAEG..HANGUL SYLLABLE RWAEH
|
||||
B8B1..B8CB ; LVT # Lo [27] HANGUL SYLLABLE ROEG..HANGUL SYLLABLE ROEH
|
||||
B8CD..B8E7 ; LVT # Lo [27] HANGUL SYLLABLE RYOG..HANGUL SYLLABLE RYOH
|
||||
B8E9..B903 ; LVT # Lo [27] HANGUL SYLLABLE RUG..HANGUL SYLLABLE RUH
|
||||
B905..B91F ; LVT # Lo [27] HANGUL SYLLABLE RWEOG..HANGUL SYLLABLE RWEOH
|
||||
B921..B93B ; LVT # Lo [27] HANGUL SYLLABLE RWEG..HANGUL SYLLABLE RWEH
|
||||
B93D..B957 ; LVT # Lo [27] HANGUL SYLLABLE RWIG..HANGUL SYLLABLE RWIH
|
||||
B959..B973 ; LVT # Lo [27] HANGUL SYLLABLE RYUG..HANGUL SYLLABLE RYUH
|
||||
B975..B98F ; LVT # Lo [27] HANGUL SYLLABLE REUG..HANGUL SYLLABLE REUH
|
||||
B991..B9AB ; LVT # Lo [27] HANGUL SYLLABLE RYIG..HANGUL SYLLABLE RYIH
|
||||
B9AD..B9C7 ; LVT # Lo [27] HANGUL SYLLABLE RIG..HANGUL SYLLABLE RIH
|
||||
B9C9..B9E3 ; LVT # Lo [27] HANGUL SYLLABLE MAG..HANGUL SYLLABLE MAH
|
||||
B9E5..B9FF ; LVT # Lo [27] HANGUL SYLLABLE MAEG..HANGUL SYLLABLE MAEH
|
||||
BA01..BA1B ; LVT # Lo [27] HANGUL SYLLABLE MYAG..HANGUL SYLLABLE MYAH
|
||||
BA1D..BA37 ; LVT # Lo [27] HANGUL SYLLABLE MYAEG..HANGUL SYLLABLE MYAEH
|
||||
BA39..BA53 ; LVT # Lo [27] HANGUL SYLLABLE MEOG..HANGUL SYLLABLE MEOH
|
||||
BA55..BA6F ; LVT # Lo [27] HANGUL SYLLABLE MEG..HANGUL SYLLABLE MEH
|
||||
BA71..BA8B ; LVT # Lo [27] HANGUL SYLLABLE MYEOG..HANGUL SYLLABLE MYEOH
|
||||
BA8D..BAA7 ; LVT # Lo [27] HANGUL SYLLABLE MYEG..HANGUL SYLLABLE MYEH
|
||||
BAA9..BAC3 ; LVT # Lo [27] HANGUL SYLLABLE MOG..HANGUL SYLLABLE MOH
|
||||
BAC5..BADF ; LVT # Lo [27] HANGUL SYLLABLE MWAG..HANGUL SYLLABLE MWAH
|
||||
BAE1..BAFB ; LVT # Lo [27] HANGUL SYLLABLE MWAEG..HANGUL SYLLABLE MWAEH
|
||||
BAFD..BB17 ; LVT # Lo [27] HANGUL SYLLABLE MOEG..HANGUL SYLLABLE MOEH
|
||||
BB19..BB33 ; LVT # Lo [27] HANGUL SYLLABLE MYOG..HANGUL SYLLABLE MYOH
|
||||
BB35..BB4F ; LVT # Lo [27] HANGUL SYLLABLE MUG..HANGUL SYLLABLE MUH
|
||||
BB51..BB6B ; LVT # Lo [27] HANGUL SYLLABLE MWEOG..HANGUL SYLLABLE MWEOH
|
||||
BB6D..BB87 ; LVT # Lo [27] HANGUL SYLLABLE MWEG..HANGUL SYLLABLE MWEH
|
||||
BB89..BBA3 ; LVT # Lo [27] HANGUL SYLLABLE MWIG..HANGUL SYLLABLE MWIH
|
||||
BBA5..BBBF ; LVT # Lo [27] HANGUL SYLLABLE MYUG..HANGUL SYLLABLE MYUH
|
||||
BBC1..BBDB ; LVT # Lo [27] HANGUL SYLLABLE MEUG..HANGUL SYLLABLE MEUH
|
||||
BBDD..BBF7 ; LVT # Lo [27] HANGUL SYLLABLE MYIG..HANGUL SYLLABLE MYIH
|
||||
BBF9..BC13 ; LVT # Lo [27] HANGUL SYLLABLE MIG..HANGUL SYLLABLE MIH
|
||||
BC15..BC2F ; LVT # Lo [27] HANGUL SYLLABLE BAG..HANGUL SYLLABLE BAH
|
||||
BC31..BC4B ; LVT # Lo [27] HANGUL SYLLABLE BAEG..HANGUL SYLLABLE BAEH
|
||||
BC4D..BC67 ; LVT # Lo [27] HANGUL SYLLABLE BYAG..HANGUL SYLLABLE BYAH
|
||||
BC69..BC83 ; LVT # Lo [27] HANGUL SYLLABLE BYAEG..HANGUL SYLLABLE BYAEH
|
||||
BC85..BC9F ; LVT # Lo [27] HANGUL SYLLABLE BEOG..HANGUL SYLLABLE BEOH
|
||||
BCA1..BCBB ; LVT # Lo [27] HANGUL SYLLABLE BEG..HANGUL SYLLABLE BEH
|
||||
BCBD..BCD7 ; LVT # Lo [27] HANGUL SYLLABLE BYEOG..HANGUL SYLLABLE BYEOH
|
||||
BCD9..BCF3 ; LVT # Lo [27] HANGUL SYLLABLE BYEG..HANGUL SYLLABLE BYEH
|
||||
BCF5..BD0F ; LVT # Lo [27] HANGUL SYLLABLE BOG..HANGUL SYLLABLE BOH
|
||||
BD11..BD2B ; LVT # Lo [27] HANGUL SYLLABLE BWAG..HANGUL SYLLABLE BWAH
|
||||
BD2D..BD47 ; LVT # Lo [27] HANGUL SYLLABLE BWAEG..HANGUL SYLLABLE BWAEH
|
||||
BD49..BD63 ; LVT # Lo [27] HANGUL SYLLABLE BOEG..HANGUL SYLLABLE BOEH
|
||||
BD65..BD7F ; LVT # Lo [27] HANGUL SYLLABLE BYOG..HANGUL SYLLABLE BYOH
|
||||
BD81..BD9B ; LVT # Lo [27] HANGUL SYLLABLE BUG..HANGUL SYLLABLE BUH
|
||||
BD9D..BDB7 ; LVT # Lo [27] HANGUL SYLLABLE BWEOG..HANGUL SYLLABLE BWEOH
|
||||
BDB9..BDD3 ; LVT # Lo [27] HANGUL SYLLABLE BWEG..HANGUL SYLLABLE BWEH
|
||||
BDD5..BDEF ; LVT # Lo [27] HANGUL SYLLABLE BWIG..HANGUL SYLLABLE BWIH
|
||||
BDF1..BE0B ; LVT # Lo [27] HANGUL SYLLABLE BYUG..HANGUL SYLLABLE BYUH
|
||||
BE0D..BE27 ; LVT # Lo [27] HANGUL SYLLABLE BEUG..HANGUL SYLLABLE BEUH
|
||||
BE29..BE43 ; LVT # Lo [27] HANGUL SYLLABLE BYIG..HANGUL SYLLABLE BYIH
|
||||
BE45..BE5F ; LVT # Lo [27] HANGUL SYLLABLE BIG..HANGUL SYLLABLE BIH
|
||||
BE61..BE7B ; LVT # Lo [27] HANGUL SYLLABLE BBAG..HANGUL SYLLABLE BBAH
|
||||
BE7D..BE97 ; LVT # Lo [27] HANGUL SYLLABLE BBAEG..HANGUL SYLLABLE BBAEH
|
||||
BE99..BEB3 ; LVT # Lo [27] HANGUL SYLLABLE BBYAG..HANGUL SYLLABLE BBYAH
|
||||
BEB5..BECF ; LVT # Lo [27] HANGUL SYLLABLE BBYAEG..HANGUL SYLLABLE BBYAEH
|
||||
BED1..BEEB ; LVT # Lo [27] HANGUL SYLLABLE BBEOG..HANGUL SYLLABLE BBEOH
|
||||
BEED..BF07 ; LVT # Lo [27] HANGUL SYLLABLE BBEG..HANGUL SYLLABLE BBEH
|
||||
BF09..BF23 ; LVT # Lo [27] HANGUL SYLLABLE BBYEOG..HANGUL SYLLABLE BBYEOH
|
||||
BF25..BF3F ; LVT # Lo [27] HANGUL SYLLABLE BBYEG..HANGUL SYLLABLE BBYEH
|
||||
BF41..BF5B ; LVT # Lo [27] HANGUL SYLLABLE BBOG..HANGUL SYLLABLE BBOH
|
||||
BF5D..BF77 ; LVT # Lo [27] HANGUL SYLLABLE BBWAG..HANGUL SYLLABLE BBWAH
|
||||
BF79..BF93 ; LVT # Lo [27] HANGUL SYLLABLE BBWAEG..HANGUL SYLLABLE BBWAEH
|
||||
BF95..BFAF ; LVT # Lo [27] HANGUL SYLLABLE BBOEG..HANGUL SYLLABLE BBOEH
|
||||
BFB1..BFCB ; LVT # Lo [27] HANGUL SYLLABLE BBYOG..HANGUL SYLLABLE BBYOH
|
||||
BFCD..BFE7 ; LVT # Lo [27] HANGUL SYLLABLE BBUG..HANGUL SYLLABLE BBUH
|
||||
BFE9..C003 ; LVT # Lo [27] HANGUL SYLLABLE BBWEOG..HANGUL SYLLABLE BBWEOH
|
||||
C005..C01F ; LVT # Lo [27] HANGUL SYLLABLE BBWEG..HANGUL SYLLABLE BBWEH
|
||||
C021..C03B ; LVT # Lo [27] HANGUL SYLLABLE BBWIG..HANGUL SYLLABLE BBWIH
|
||||
C03D..C057 ; LVT # Lo [27] HANGUL SYLLABLE BBYUG..HANGUL SYLLABLE BBYUH
|
||||
C059..C073 ; LVT # Lo [27] HANGUL SYLLABLE BBEUG..HANGUL SYLLABLE BBEUH
|
||||
C075..C08F ; LVT # Lo [27] HANGUL SYLLABLE BBYIG..HANGUL SYLLABLE BBYIH
|
||||
C091..C0AB ; LVT # Lo [27] HANGUL SYLLABLE BBIG..HANGUL SYLLABLE BBIH
|
||||
C0AD..C0C7 ; LVT # Lo [27] HANGUL SYLLABLE SAG..HANGUL SYLLABLE SAH
|
||||
C0C9..C0E3 ; LVT # Lo [27] HANGUL SYLLABLE SAEG..HANGUL SYLLABLE SAEH
|
||||
C0E5..C0FF ; LVT # Lo [27] HANGUL SYLLABLE SYAG..HANGUL SYLLABLE SYAH
|
||||
C101..C11B ; LVT # Lo [27] HANGUL SYLLABLE SYAEG..HANGUL SYLLABLE SYAEH
|
||||
C11D..C137 ; LVT # Lo [27] HANGUL SYLLABLE SEOG..HANGUL SYLLABLE SEOH
|
||||
C139..C153 ; LVT # Lo [27] HANGUL SYLLABLE SEG..HANGUL SYLLABLE SEH
|
||||
C155..C16F ; LVT # Lo [27] HANGUL SYLLABLE SYEOG..HANGUL SYLLABLE SYEOH
|
||||
C171..C18B ; LVT # Lo [27] HANGUL SYLLABLE SYEG..HANGUL SYLLABLE SYEH
|
||||
C18D..C1A7 ; LVT # Lo [27] HANGUL SYLLABLE SOG..HANGUL SYLLABLE SOH
|
||||
C1A9..C1C3 ; LVT # Lo [27] HANGUL SYLLABLE SWAG..HANGUL SYLLABLE SWAH
|
||||
C1C5..C1DF ; LVT # Lo [27] HANGUL SYLLABLE SWAEG..HANGUL SYLLABLE SWAEH
|
||||
C1E1..C1FB ; LVT # Lo [27] HANGUL SYLLABLE SOEG..HANGUL SYLLABLE SOEH
|
||||
C1FD..C217 ; LVT # Lo [27] HANGUL SYLLABLE SYOG..HANGUL SYLLABLE SYOH
|
||||
C219..C233 ; LVT # Lo [27] HANGUL SYLLABLE SUG..HANGUL SYLLABLE SUH
|
||||
C235..C24F ; LVT # Lo [27] HANGUL SYLLABLE SWEOG..HANGUL SYLLABLE SWEOH
|
||||
C251..C26B ; LVT # Lo [27] HANGUL SYLLABLE SWEG..HANGUL SYLLABLE SWEH
|
||||
C26D..C287 ; LVT # Lo [27] HANGUL SYLLABLE SWIG..HANGUL SYLLABLE SWIH
|
||||
C289..C2A3 ; LVT # Lo [27] HANGUL SYLLABLE SYUG..HANGUL SYLLABLE SYUH
|
||||
C2A5..C2BF ; LVT # Lo [27] HANGUL SYLLABLE SEUG..HANGUL SYLLABLE SEUH
|
||||
C2C1..C2DB ; LVT # Lo [27] HANGUL SYLLABLE SYIG..HANGUL SYLLABLE SYIH
|
||||
C2DD..C2F7 ; LVT # Lo [27] HANGUL SYLLABLE SIG..HANGUL SYLLABLE SIH
|
||||
C2F9..C313 ; LVT # Lo [27] HANGUL SYLLABLE SSAG..HANGUL SYLLABLE SSAH
|
||||
C315..C32F ; LVT # Lo [27] HANGUL SYLLABLE SSAEG..HANGUL SYLLABLE SSAEH
|
||||
C331..C34B ; LVT # Lo [27] HANGUL SYLLABLE SSYAG..HANGUL SYLLABLE SSYAH
|
||||
C34D..C367 ; LVT # Lo [27] HANGUL SYLLABLE SSYAEG..HANGUL SYLLABLE SSYAEH
|
||||
C369..C383 ; LVT # Lo [27] HANGUL SYLLABLE SSEOG..HANGUL SYLLABLE SSEOH
|
||||
C385..C39F ; LVT # Lo [27] HANGUL SYLLABLE SSEG..HANGUL SYLLABLE SSEH
|
||||
C3A1..C3BB ; LVT # Lo [27] HANGUL SYLLABLE SSYEOG..HANGUL SYLLABLE SSYEOH
|
||||
C3BD..C3D7 ; LVT # Lo [27] HANGUL SYLLABLE SSYEG..HANGUL SYLLABLE SSYEH
|
||||
C3D9..C3F3 ; LVT # Lo [27] HANGUL SYLLABLE SSOG..HANGUL SYLLABLE SSOH
|
||||
C3F5..C40F ; LVT # Lo [27] HANGUL SYLLABLE SSWAG..HANGUL SYLLABLE SSWAH
|
||||
C411..C42B ; LVT # Lo [27] HANGUL SYLLABLE SSWAEG..HANGUL SYLLABLE SSWAEH
|
||||
C42D..C447 ; LVT # Lo [27] HANGUL SYLLABLE SSOEG..HANGUL SYLLABLE SSOEH
|
||||
C449..C463 ; LVT # Lo [27] HANGUL SYLLABLE SSYOG..HANGUL SYLLABLE SSYOH
|
||||
C465..C47F ; LVT # Lo [27] HANGUL SYLLABLE SSUG..HANGUL SYLLABLE SSUH
|
||||
C481..C49B ; LVT # Lo [27] HANGUL SYLLABLE SSWEOG..HANGUL SYLLABLE SSWEOH
|
||||
C49D..C4B7 ; LVT # Lo [27] HANGUL SYLLABLE SSWEG..HANGUL SYLLABLE SSWEH
|
||||
C4B9..C4D3 ; LVT # Lo [27] HANGUL SYLLABLE SSWIG..HANGUL SYLLABLE SSWIH
|
||||
C4D5..C4EF ; LVT # Lo [27] HANGUL SYLLABLE SSYUG..HANGUL SYLLABLE SSYUH
|
||||
C4F1..C50B ; LVT # Lo [27] HANGUL SYLLABLE SSEUG..HANGUL SYLLABLE SSEUH
|
||||
C50D..C527 ; LVT # Lo [27] HANGUL SYLLABLE SSYIG..HANGUL SYLLABLE SSYIH
|
||||
C529..C543 ; LVT # Lo [27] HANGUL SYLLABLE SSIG..HANGUL SYLLABLE SSIH
|
||||
C545..C55F ; LVT # Lo [27] HANGUL SYLLABLE AG..HANGUL SYLLABLE AH
|
||||
C561..C57B ; LVT # Lo [27] HANGUL SYLLABLE AEG..HANGUL SYLLABLE AEH
|
||||
C57D..C597 ; LVT # Lo [27] HANGUL SYLLABLE YAG..HANGUL SYLLABLE YAH
|
||||
C599..C5B3 ; LVT # Lo [27] HANGUL SYLLABLE YAEG..HANGUL SYLLABLE YAEH
|
||||
C5B5..C5CF ; LVT # Lo [27] HANGUL SYLLABLE EOG..HANGUL SYLLABLE EOH
|
||||
C5D1..C5EB ; LVT # Lo [27] HANGUL SYLLABLE EG..HANGUL SYLLABLE EH
|
||||
C5ED..C607 ; LVT # Lo [27] HANGUL SYLLABLE YEOG..HANGUL SYLLABLE YEOH
|
||||
C609..C623 ; LVT # Lo [27] HANGUL SYLLABLE YEG..HANGUL SYLLABLE YEH
|
||||
C625..C63F ; LVT # Lo [27] HANGUL SYLLABLE OG..HANGUL SYLLABLE OH
|
||||
C641..C65B ; LVT # Lo [27] HANGUL SYLLABLE WAG..HANGUL SYLLABLE WAH
|
||||
C65D..C677 ; LVT # Lo [27] HANGUL SYLLABLE WAEG..HANGUL SYLLABLE WAEH
|
||||
C679..C693 ; LVT # Lo [27] HANGUL SYLLABLE OEG..HANGUL SYLLABLE OEH
|
||||
C695..C6AF ; LVT # Lo [27] HANGUL SYLLABLE YOG..HANGUL SYLLABLE YOH
|
||||
C6B1..C6CB ; LVT # Lo [27] HANGUL SYLLABLE UG..HANGUL SYLLABLE UH
|
||||
C6CD..C6E7 ; LVT # Lo [27] HANGUL SYLLABLE WEOG..HANGUL SYLLABLE WEOH
|
||||
C6E9..C703 ; LVT # Lo [27] HANGUL SYLLABLE WEG..HANGUL SYLLABLE WEH
|
||||
C705..C71F ; LVT # Lo [27] HANGUL SYLLABLE WIG..HANGUL SYLLABLE WIH
|
||||
C721..C73B ; LVT # Lo [27] HANGUL SYLLABLE YUG..HANGUL SYLLABLE YUH
|
||||
C73D..C757 ; LVT # Lo [27] HANGUL SYLLABLE EUG..HANGUL SYLLABLE EUH
|
||||
C759..C773 ; LVT # Lo [27] HANGUL SYLLABLE YIG..HANGUL SYLLABLE YIH
|
||||
C775..C78F ; LVT # Lo [27] HANGUL SYLLABLE IG..HANGUL SYLLABLE IH
|
||||
C791..C7AB ; LVT # Lo [27] HANGUL SYLLABLE JAG..HANGUL SYLLABLE JAH
|
||||
C7AD..C7C7 ; LVT # Lo [27] HANGUL SYLLABLE JAEG..HANGUL SYLLABLE JAEH
|
||||
C7C9..C7E3 ; LVT # Lo [27] HANGUL SYLLABLE JYAG..HANGUL SYLLABLE JYAH
|
||||
C7E5..C7FF ; LVT # Lo [27] HANGUL SYLLABLE JYAEG..HANGUL SYLLABLE JYAEH
|
||||
C801..C81B ; LVT # Lo [27] HANGUL SYLLABLE JEOG..HANGUL SYLLABLE JEOH
|
||||
C81D..C837 ; LVT # Lo [27] HANGUL SYLLABLE JEG..HANGUL SYLLABLE JEH
|
||||
C839..C853 ; LVT # Lo [27] HANGUL SYLLABLE JYEOG..HANGUL SYLLABLE JYEOH
|
||||
C855..C86F ; LVT # Lo [27] HANGUL SYLLABLE JYEG..HANGUL SYLLABLE JYEH
|
||||
C871..C88B ; LVT # Lo [27] HANGUL SYLLABLE JOG..HANGUL SYLLABLE JOH
|
||||
C88D..C8A7 ; LVT # Lo [27] HANGUL SYLLABLE JWAG..HANGUL SYLLABLE JWAH
|
||||
C8A9..C8C3 ; LVT # Lo [27] HANGUL SYLLABLE JWAEG..HANGUL SYLLABLE JWAEH
|
||||
C8C5..C8DF ; LVT # Lo [27] HANGUL SYLLABLE JOEG..HANGUL SYLLABLE JOEH
|
||||
C8E1..C8FB ; LVT # Lo [27] HANGUL SYLLABLE JYOG..HANGUL SYLLABLE JYOH
|
||||
C8FD..C917 ; LVT # Lo [27] HANGUL SYLLABLE JUG..HANGUL SYLLABLE JUH
|
||||
C919..C933 ; LVT # Lo [27] HANGUL SYLLABLE JWEOG..HANGUL SYLLABLE JWEOH
|
||||
C935..C94F ; LVT # Lo [27] HANGUL SYLLABLE JWEG..HANGUL SYLLABLE JWEH
|
||||
C951..C96B ; LVT # Lo [27] HANGUL SYLLABLE JWIG..HANGUL SYLLABLE JWIH
|
||||
C96D..C987 ; LVT # Lo [27] HANGUL SYLLABLE JYUG..HANGUL SYLLABLE JYUH
|
||||
C989..C9A3 ; LVT # Lo [27] HANGUL SYLLABLE JEUG..HANGUL SYLLABLE JEUH
|
||||
C9A5..C9BF ; LVT # Lo [27] HANGUL SYLLABLE JYIG..HANGUL SYLLABLE JYIH
|
||||
C9C1..C9DB ; LVT # Lo [27] HANGUL SYLLABLE JIG..HANGUL SYLLABLE JIH
|
||||
C9DD..C9F7 ; LVT # Lo [27] HANGUL SYLLABLE JJAG..HANGUL SYLLABLE JJAH
|
||||
C9F9..CA13 ; LVT # Lo [27] HANGUL SYLLABLE JJAEG..HANGUL SYLLABLE JJAEH
|
||||
CA15..CA2F ; LVT # Lo [27] HANGUL SYLLABLE JJYAG..HANGUL SYLLABLE JJYAH
|
||||
CA31..CA4B ; LVT # Lo [27] HANGUL SYLLABLE JJYAEG..HANGUL SYLLABLE JJYAEH
|
||||
CA4D..CA67 ; LVT # Lo [27] HANGUL SYLLABLE JJEOG..HANGUL SYLLABLE JJEOH
|
||||
CA69..CA83 ; LVT # Lo [27] HANGUL SYLLABLE JJEG..HANGUL SYLLABLE JJEH
|
||||
CA85..CA9F ; LVT # Lo [27] HANGUL SYLLABLE JJYEOG..HANGUL SYLLABLE JJYEOH
|
||||
CAA1..CABB ; LVT # Lo [27] HANGUL SYLLABLE JJYEG..HANGUL SYLLABLE JJYEH
|
||||
CABD..CAD7 ; LVT # Lo [27] HANGUL SYLLABLE JJOG..HANGUL SYLLABLE JJOH
|
||||
CAD9..CAF3 ; LVT # Lo [27] HANGUL SYLLABLE JJWAG..HANGUL SYLLABLE JJWAH
|
||||
CAF5..CB0F ; LVT # Lo [27] HANGUL SYLLABLE JJWAEG..HANGUL SYLLABLE JJWAEH
|
||||
CB11..CB2B ; LVT # Lo [27] HANGUL SYLLABLE JJOEG..HANGUL SYLLABLE JJOEH
|
||||
CB2D..CB47 ; LVT # Lo [27] HANGUL SYLLABLE JJYOG..HANGUL SYLLABLE JJYOH
|
||||
CB49..CB63 ; LVT # Lo [27] HANGUL SYLLABLE JJUG..HANGUL SYLLABLE JJUH
|
||||
CB65..CB7F ; LVT # Lo [27] HANGUL SYLLABLE JJWEOG..HANGUL SYLLABLE JJWEOH
|
||||
CB81..CB9B ; LVT # Lo [27] HANGUL SYLLABLE JJWEG..HANGUL SYLLABLE JJWEH
|
||||
CB9D..CBB7 ; LVT # Lo [27] HANGUL SYLLABLE JJWIG..HANGUL SYLLABLE JJWIH
|
||||
CBB9..CBD3 ; LVT # Lo [27] HANGUL SYLLABLE JJYUG..HANGUL SYLLABLE JJYUH
|
||||
CBD5..CBEF ; LVT # Lo [27] HANGUL SYLLABLE JJEUG..HANGUL SYLLABLE JJEUH
|
||||
CBF1..CC0B ; LVT # Lo [27] HANGUL SYLLABLE JJYIG..HANGUL SYLLABLE JJYIH
|
||||
CC0D..CC27 ; LVT # Lo [27] HANGUL SYLLABLE JJIG..HANGUL SYLLABLE JJIH
|
||||
CC29..CC43 ; LVT # Lo [27] HANGUL SYLLABLE CAG..HANGUL SYLLABLE CAH
|
||||
CC45..CC5F ; LVT # Lo [27] HANGUL SYLLABLE CAEG..HANGUL SYLLABLE CAEH
|
||||
CC61..CC7B ; LVT # Lo [27] HANGUL SYLLABLE CYAG..HANGUL SYLLABLE CYAH
|
||||
CC7D..CC97 ; LVT # Lo [27] HANGUL SYLLABLE CYAEG..HANGUL SYLLABLE CYAEH
|
||||
CC99..CCB3 ; LVT # Lo [27] HANGUL SYLLABLE CEOG..HANGUL SYLLABLE CEOH
|
||||
CCB5..CCCF ; LVT # Lo [27] HANGUL SYLLABLE CEG..HANGUL SYLLABLE CEH
|
||||
CCD1..CCEB ; LVT # Lo [27] HANGUL SYLLABLE CYEOG..HANGUL SYLLABLE CYEOH
|
||||
CCED..CD07 ; LVT # Lo [27] HANGUL SYLLABLE CYEG..HANGUL SYLLABLE CYEH
|
||||
CD09..CD23 ; LVT # Lo [27] HANGUL SYLLABLE COG..HANGUL SYLLABLE COH
|
||||
CD25..CD3F ; LVT # Lo [27] HANGUL SYLLABLE CWAG..HANGUL SYLLABLE CWAH
|
||||
CD41..CD5B ; LVT # Lo [27] HANGUL SYLLABLE CWAEG..HANGUL SYLLABLE CWAEH
|
||||
CD5D..CD77 ; LVT # Lo [27] HANGUL SYLLABLE COEG..HANGUL SYLLABLE COEH
|
||||
CD79..CD93 ; LVT # Lo [27] HANGUL SYLLABLE CYOG..HANGUL SYLLABLE CYOH
|
||||
CD95..CDAF ; LVT # Lo [27] HANGUL SYLLABLE CUG..HANGUL SYLLABLE CUH
|
||||
CDB1..CDCB ; LVT # Lo [27] HANGUL SYLLABLE CWEOG..HANGUL SYLLABLE CWEOH
|
||||
CDCD..CDE7 ; LVT # Lo [27] HANGUL SYLLABLE CWEG..HANGUL SYLLABLE CWEH
|
||||
CDE9..CE03 ; LVT # Lo [27] HANGUL SYLLABLE CWIG..HANGUL SYLLABLE CWIH
|
||||
CE05..CE1F ; LVT # Lo [27] HANGUL SYLLABLE CYUG..HANGUL SYLLABLE CYUH
|
||||
CE21..CE3B ; LVT # Lo [27] HANGUL SYLLABLE CEUG..HANGUL SYLLABLE CEUH
|
||||
CE3D..CE57 ; LVT # Lo [27] HANGUL SYLLABLE CYIG..HANGUL SYLLABLE CYIH
|
||||
CE59..CE73 ; LVT # Lo [27] HANGUL SYLLABLE CIG..HANGUL SYLLABLE CIH
|
||||
CE75..CE8F ; LVT # Lo [27] HANGUL SYLLABLE KAG..HANGUL SYLLABLE KAH
|
||||
CE91..CEAB ; LVT # Lo [27] HANGUL SYLLABLE KAEG..HANGUL SYLLABLE KAEH
|
||||
CEAD..CEC7 ; LVT # Lo [27] HANGUL SYLLABLE KYAG..HANGUL SYLLABLE KYAH
|
||||
CEC9..CEE3 ; LVT # Lo [27] HANGUL SYLLABLE KYAEG..HANGUL SYLLABLE KYAEH
|
||||
CEE5..CEFF ; LVT # Lo [27] HANGUL SYLLABLE KEOG..HANGUL SYLLABLE KEOH
|
||||
CF01..CF1B ; LVT # Lo [27] HANGUL SYLLABLE KEG..HANGUL SYLLABLE KEH
|
||||
CF1D..CF37 ; LVT # Lo [27] HANGUL SYLLABLE KYEOG..HANGUL SYLLABLE KYEOH
|
||||
CF39..CF53 ; LVT # Lo [27] HANGUL SYLLABLE KYEG..HANGUL SYLLABLE KYEH
|
||||
CF55..CF6F ; LVT # Lo [27] HANGUL SYLLABLE KOG..HANGUL SYLLABLE KOH
|
||||
CF71..CF8B ; LVT # Lo [27] HANGUL SYLLABLE KWAG..HANGUL SYLLABLE KWAH
|
||||
CF8D..CFA7 ; LVT # Lo [27] HANGUL SYLLABLE KWAEG..HANGUL SYLLABLE KWAEH
|
||||
CFA9..CFC3 ; LVT # Lo [27] HANGUL SYLLABLE KOEG..HANGUL SYLLABLE KOEH
|
||||
CFC5..CFDF ; LVT # Lo [27] HANGUL SYLLABLE KYOG..HANGUL SYLLABLE KYOH
|
||||
CFE1..CFFB ; LVT # Lo [27] HANGUL SYLLABLE KUG..HANGUL SYLLABLE KUH
|
||||
CFFD..D017 ; LVT # Lo [27] HANGUL SYLLABLE KWEOG..HANGUL SYLLABLE KWEOH
|
||||
D019..D033 ; LVT # Lo [27] HANGUL SYLLABLE KWEG..HANGUL SYLLABLE KWEH
|
||||
D035..D04F ; LVT # Lo [27] HANGUL SYLLABLE KWIG..HANGUL SYLLABLE KWIH
|
||||
D051..D06B ; LVT # Lo [27] HANGUL SYLLABLE KYUG..HANGUL SYLLABLE KYUH
|
||||
D06D..D087 ; LVT # Lo [27] HANGUL SYLLABLE KEUG..HANGUL SYLLABLE KEUH
|
||||
D089..D0A3 ; LVT # Lo [27] HANGUL SYLLABLE KYIG..HANGUL SYLLABLE KYIH
|
||||
D0A5..D0BF ; LVT # Lo [27] HANGUL SYLLABLE KIG..HANGUL SYLLABLE KIH
|
||||
D0C1..D0DB ; LVT # Lo [27] HANGUL SYLLABLE TAG..HANGUL SYLLABLE TAH
|
||||
D0DD..D0F7 ; LVT # Lo [27] HANGUL SYLLABLE TAEG..HANGUL SYLLABLE TAEH
|
||||
D0F9..D113 ; LVT # Lo [27] HANGUL SYLLABLE TYAG..HANGUL SYLLABLE TYAH
|
||||
D115..D12F ; LVT # Lo [27] HANGUL SYLLABLE TYAEG..HANGUL SYLLABLE TYAEH
|
||||
D131..D14B ; LVT # Lo [27] HANGUL SYLLABLE TEOG..HANGUL SYLLABLE TEOH
|
||||
D14D..D167 ; LVT # Lo [27] HANGUL SYLLABLE TEG..HANGUL SYLLABLE TEH
|
||||
D169..D183 ; LVT # Lo [27] HANGUL SYLLABLE TYEOG..HANGUL SYLLABLE TYEOH
|
||||
D185..D19F ; LVT # Lo [27] HANGUL SYLLABLE TYEG..HANGUL SYLLABLE TYEH
|
||||
D1A1..D1BB ; LVT # Lo [27] HANGUL SYLLABLE TOG..HANGUL SYLLABLE TOH
|
||||
D1BD..D1D7 ; LVT # Lo [27] HANGUL SYLLABLE TWAG..HANGUL SYLLABLE TWAH
|
||||
D1D9..D1F3 ; LVT # Lo [27] HANGUL SYLLABLE TWAEG..HANGUL SYLLABLE TWAEH
|
||||
D1F5..D20F ; LVT # Lo [27] HANGUL SYLLABLE TOEG..HANGUL SYLLABLE TOEH
|
||||
D211..D22B ; LVT # Lo [27] HANGUL SYLLABLE TYOG..HANGUL SYLLABLE TYOH
|
||||
D22D..D247 ; LVT # Lo [27] HANGUL SYLLABLE TUG..HANGUL SYLLABLE TUH
|
||||
D249..D263 ; LVT # Lo [27] HANGUL SYLLABLE TWEOG..HANGUL SYLLABLE TWEOH
|
||||
D265..D27F ; LVT # Lo [27] HANGUL SYLLABLE TWEG..HANGUL SYLLABLE TWEH
|
||||
D281..D29B ; LVT # Lo [27] HANGUL SYLLABLE TWIG..HANGUL SYLLABLE TWIH
|
||||
D29D..D2B7 ; LVT # Lo [27] HANGUL SYLLABLE TYUG..HANGUL SYLLABLE TYUH
|
||||
D2B9..D2D3 ; LVT # Lo [27] HANGUL SYLLABLE TEUG..HANGUL SYLLABLE TEUH
|
||||
D2D5..D2EF ; LVT # Lo [27] HANGUL SYLLABLE TYIG..HANGUL SYLLABLE TYIH
|
||||
D2F1..D30B ; LVT # Lo [27] HANGUL SYLLABLE TIG..HANGUL SYLLABLE TIH
|
||||
D30D..D327 ; LVT # Lo [27] HANGUL SYLLABLE PAG..HANGUL SYLLABLE PAH
|
||||
D329..D343 ; LVT # Lo [27] HANGUL SYLLABLE PAEG..HANGUL SYLLABLE PAEH
|
||||
D345..D35F ; LVT # Lo [27] HANGUL SYLLABLE PYAG..HANGUL SYLLABLE PYAH
|
||||
D361..D37B ; LVT # Lo [27] HANGUL SYLLABLE PYAEG..HANGUL SYLLABLE PYAEH
|
||||
D37D..D397 ; LVT # Lo [27] HANGUL SYLLABLE PEOG..HANGUL SYLLABLE PEOH
|
||||
D399..D3B3 ; LVT # Lo [27] HANGUL SYLLABLE PEG..HANGUL SYLLABLE PEH
|
||||
D3B5..D3CF ; LVT # Lo [27] HANGUL SYLLABLE PYEOG..HANGUL SYLLABLE PYEOH
|
||||
D3D1..D3EB ; LVT # Lo [27] HANGUL SYLLABLE PYEG..HANGUL SYLLABLE PYEH
|
||||
D3ED..D407 ; LVT # Lo [27] HANGUL SYLLABLE POG..HANGUL SYLLABLE POH
|
||||
D409..D423 ; LVT # Lo [27] HANGUL SYLLABLE PWAG..HANGUL SYLLABLE PWAH
|
||||
D425..D43F ; LVT # Lo [27] HANGUL SYLLABLE PWAEG..HANGUL SYLLABLE PWAEH
|
||||
D441..D45B ; LVT # Lo [27] HANGUL SYLLABLE POEG..HANGUL SYLLABLE POEH
|
||||
D45D..D477 ; LVT # Lo [27] HANGUL SYLLABLE PYOG..HANGUL SYLLABLE PYOH
|
||||
D479..D493 ; LVT # Lo [27] HANGUL SYLLABLE PUG..HANGUL SYLLABLE PUH
|
||||
D495..D4AF ; LVT # Lo [27] HANGUL SYLLABLE PWEOG..HANGUL SYLLABLE PWEOH
|
||||
D4B1..D4CB ; LVT # Lo [27] HANGUL SYLLABLE PWEG..HANGUL SYLLABLE PWEH
|
||||
D4CD..D4E7 ; LVT # Lo [27] HANGUL SYLLABLE PWIG..HANGUL SYLLABLE PWIH
|
||||
D4E9..D503 ; LVT # Lo [27] HANGUL SYLLABLE PYUG..HANGUL SYLLABLE PYUH
|
||||
D505..D51F ; LVT # Lo [27] HANGUL SYLLABLE PEUG..HANGUL SYLLABLE PEUH
|
||||
D521..D53B ; LVT # Lo [27] HANGUL SYLLABLE PYIG..HANGUL SYLLABLE PYIH
|
||||
D53D..D557 ; LVT # Lo [27] HANGUL SYLLABLE PIG..HANGUL SYLLABLE PIH
|
||||
D559..D573 ; LVT # Lo [27] HANGUL SYLLABLE HAG..HANGUL SYLLABLE HAH
|
||||
D575..D58F ; LVT # Lo [27] HANGUL SYLLABLE HAEG..HANGUL SYLLABLE HAEH
|
||||
D591..D5AB ; LVT # Lo [27] HANGUL SYLLABLE HYAG..HANGUL SYLLABLE HYAH
|
||||
D5AD..D5C7 ; LVT # Lo [27] HANGUL SYLLABLE HYAEG..HANGUL SYLLABLE HYAEH
|
||||
D5C9..D5E3 ; LVT # Lo [27] HANGUL SYLLABLE HEOG..HANGUL SYLLABLE HEOH
|
||||
D5E5..D5FF ; LVT # Lo [27] HANGUL SYLLABLE HEG..HANGUL SYLLABLE HEH
|
||||
D601..D61B ; LVT # Lo [27] HANGUL SYLLABLE HYEOG..HANGUL SYLLABLE HYEOH
|
||||
D61D..D637 ; LVT # Lo [27] HANGUL SYLLABLE HYEG..HANGUL SYLLABLE HYEH
|
||||
D639..D653 ; LVT # Lo [27] HANGUL SYLLABLE HOG..HANGUL SYLLABLE HOH
|
||||
D655..D66F ; LVT # Lo [27] HANGUL SYLLABLE HWAG..HANGUL SYLLABLE HWAH
|
||||
D671..D68B ; LVT # Lo [27] HANGUL SYLLABLE HWAEG..HANGUL SYLLABLE HWAEH
|
||||
D68D..D6A7 ; LVT # Lo [27] HANGUL SYLLABLE HOEG..HANGUL SYLLABLE HOEH
|
||||
D6A9..D6C3 ; LVT # Lo [27] HANGUL SYLLABLE HYOG..HANGUL SYLLABLE HYOH
|
||||
D6C5..D6DF ; LVT # Lo [27] HANGUL SYLLABLE HUG..HANGUL SYLLABLE HUH
|
||||
D6E1..D6FB ; LVT # Lo [27] HANGUL SYLLABLE HWEOG..HANGUL SYLLABLE HWEOH
|
||||
D6FD..D717 ; LVT # Lo [27] HANGUL SYLLABLE HWEG..HANGUL SYLLABLE HWEH
|
||||
D719..D733 ; LVT # Lo [27] HANGUL SYLLABLE HWIG..HANGUL SYLLABLE HWIH
|
||||
D735..D74F ; LVT # Lo [27] HANGUL SYLLABLE HYUG..HANGUL SYLLABLE HYUH
|
||||
D751..D76B ; LVT # Lo [27] HANGUL SYLLABLE HEUG..HANGUL SYLLABLE HEUH
|
||||
D76D..D787 ; LVT # Lo [27] HANGUL SYLLABLE HYIG..HANGUL SYLLABLE HYIH
|
||||
D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH
|
||||
|
||||
# Total code points: 10773
|
||||
|
||||
# EOF
|
19129
deps/zg/data/unicode/NormalizationTest.txt
vendored
Normal file
19129
deps/zg/data/unicode/NormalizationTest.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1827
deps/zg/data/unicode/PropList.txt
vendored
Normal file
1827
deps/zg/data/unicode/PropList.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3033
deps/zg/data/unicode/Scripts.txt
vendored
Normal file
3033
deps/zg/data/unicode/Scripts.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
34931
deps/zg/data/unicode/UnicodeData.txt
vendored
Normal file
34931
deps/zg/data/unicode/UnicodeData.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1475
deps/zg/data/unicode/auxiliary/GraphemeBreakProperty.txt
vendored
Normal file
1475
deps/zg/data/unicode/auxiliary/GraphemeBreakProperty.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1215
deps/zg/data/unicode/auxiliary/GraphemeBreakTest.txt
vendored
Normal file
1215
deps/zg/data/unicode/auxiliary/GraphemeBreakTest.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1320
deps/zg/data/unicode/emoji/emoji-data.txt
vendored
Normal file
1320
deps/zg/data/unicode/emoji/emoji-data.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2788
deps/zg/data/unicode/extracted/DerivedCombiningClass.txt
vendored
Normal file
2788
deps/zg/data/unicode/extracted/DerivedCombiningClass.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2611
deps/zg/data/unicode/extracted/DerivedEastAsianWidth.txt
vendored
Normal file
2611
deps/zg/data/unicode/extracted/DerivedEastAsianWidth.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4233
deps/zg/data/unicode/extracted/DerivedGeneralCategory.txt
vendored
Normal file
4233
deps/zg/data/unicode/extracted/DerivedGeneralCategory.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
291
deps/zg/data/unicode/extracted/DerivedNumericType.txt
vendored
Normal file
291
deps/zg/data/unicode/extracted/DerivedNumericType.txt
vendored
Normal file
@@ -0,0 +1,291 @@
|
||||
# DerivedNumericType-15.1.0.txt
|
||||
# Date: 2023-01-05, 20:34:41 GMT
|
||||
# © 2023 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see https://www.unicode.org/reports/tr44/
|
||||
|
||||
# ================================================
|
||||
|
||||
# Derived Property: Numeric_Type
|
||||
# The values are based on fields 6-8 of UnicodeData.txt, plus the fields
|
||||
# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan).
|
||||
# The derivations for these values are as follows.
|
||||
# Numeric_Type=Decimal: When there is a value in field 6.
|
||||
# Numeric_Type=Digit: When there is a value in field 7, but not in field 6.
|
||||
# Numeric_Type=Numeric: When there are values for kAccountingNumeric, kOtherNumeric, kPrimaryNumeric,
|
||||
# or there is a value in field 8, but not in field 7.
|
||||
# Numeric_Type=None: Otherwise
|
||||
|
||||
# All code points not explicitly listed for Numeric_Type
|
||||
# have the value None.
|
||||
|
||||
# @missing: 0000..10FFFF; None
|
||||
|
||||
# ================================================
|
||||
|
||||
00BC..00BE ; Numeric # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
|
||||
09F4..09F9 ; Numeric # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
|
||||
0B72..0B77 ; Numeric # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
0BF0..0BF2 ; Numeric # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
|
||||
0C78..0C7E ; Numeric # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
|
||||
0D58..0D5E ; Numeric # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
|
||||
0D70..0D78 ; Numeric # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
|
||||
0F2A..0F33 ; Numeric # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
|
||||
1372..137C ; Numeric # No [11] ETHIOPIC NUMBER TEN..ETHIOPIC NUMBER TEN THOUSAND
|
||||
16EE..16F0 ; Numeric # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
|
||||
17F0..17F9 ; Numeric # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
|
||||
2150..215F ; Numeric # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
|
||||
2160..2182 ; Numeric # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
|
||||
2185..2188 ; Numeric # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
|
||||
2189 ; Numeric # No VULGAR FRACTION ZERO THIRDS
|
||||
2469..2473 ; Numeric # No [11] CIRCLED NUMBER TEN..CIRCLED NUMBER TWENTY
|
||||
247D..2487 ; Numeric # No [11] PARENTHESIZED NUMBER TEN..PARENTHESIZED NUMBER TWENTY
|
||||
2491..249B ; Numeric # No [11] NUMBER TEN FULL STOP..NUMBER TWENTY FULL STOP
|
||||
24EB..24F4 ; Numeric # No [10] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED NUMBER TWENTY
|
||||
24FE ; Numeric # No DOUBLE CIRCLED NUMBER TEN
|
||||
277F ; Numeric # No DINGBAT NEGATIVE CIRCLED NUMBER TEN
|
||||
2789 ; Numeric # No DINGBAT CIRCLED SANS-SERIF NUMBER TEN
|
||||
2793 ; Numeric # No DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
|
||||
2CFD ; Numeric # No COPTIC FRACTION ONE HALF
|
||||
3007 ; Numeric # Nl IDEOGRAPHIC NUMBER ZERO
|
||||
3021..3029 ; Numeric # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Numeric # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
3192..3195 ; Numeric # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3220..3229 ; Numeric # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
3248..324F ; Numeric # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
|
||||
3251..325F ; Numeric # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
|
||||
3280..3289 ; Numeric # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
|
||||
32B1..32BF ; Numeric # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
|
||||
3405 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3405
|
||||
3483 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3483
|
||||
382A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-382A
|
||||
3B4D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-3B4D
|
||||
4E00 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E00
|
||||
4E03 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E03
|
||||
4E07 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E07
|
||||
4E09 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E09
|
||||
4E24 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E24
|
||||
4E5D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E5D
|
||||
4E8C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E8C
|
||||
4E94 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E94
|
||||
4E96 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4E96
|
||||
4EAC ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EAC
|
||||
4EBF..4EC0 ; Numeric # Lo [2] CJK UNIFIED IDEOGRAPH-4EBF..CJK UNIFIED IDEOGRAPH-4EC0
|
||||
4EDF ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EDF
|
||||
4EE8 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4EE8
|
||||
4F0D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4F0D
|
||||
4F70 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4F70
|
||||
4FE9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-4FE9
|
||||
5006 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5006
|
||||
5104 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5104
|
||||
5146 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5146
|
||||
5169 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5169
|
||||
516B ; Numeric # Lo CJK UNIFIED IDEOGRAPH-516B
|
||||
516D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-516D
|
||||
5341 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5341
|
||||
5343..5345 ; Numeric # Lo [3] CJK UNIFIED IDEOGRAPH-5343..CJK UNIFIED IDEOGRAPH-5345
|
||||
534C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-534C
|
||||
53C1..53C4 ; Numeric # Lo [4] CJK UNIFIED IDEOGRAPH-53C1..CJK UNIFIED IDEOGRAPH-53C4
|
||||
56DB ; Numeric # Lo CJK UNIFIED IDEOGRAPH-56DB
|
||||
58F1 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-58F1
|
||||
58F9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-58F9
|
||||
5E7A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5E7A
|
||||
5EFE..5EFF ; Numeric # Lo [2] CJK UNIFIED IDEOGRAPH-5EFE..CJK UNIFIED IDEOGRAPH-5EFF
|
||||
5F0C..5F0E ; Numeric # Lo [3] CJK UNIFIED IDEOGRAPH-5F0C..CJK UNIFIED IDEOGRAPH-5F0E
|
||||
5F10 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-5F10
|
||||
62D0 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-62D0
|
||||
62FE ; Numeric # Lo CJK UNIFIED IDEOGRAPH-62FE
|
||||
634C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-634C
|
||||
67D2 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-67D2
|
||||
6D1E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-6D1E
|
||||
6F06 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-6F06
|
||||
7396 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-7396
|
||||
767E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-767E
|
||||
7695 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-7695
|
||||
79ED ; Numeric # Lo CJK UNIFIED IDEOGRAPH-79ED
|
||||
8086 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8086
|
||||
842C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-842C
|
||||
8CAE ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8CAE
|
||||
8CB3 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8CB3
|
||||
8D30 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-8D30
|
||||
920E ; Numeric # Lo CJK UNIFIED IDEOGRAPH-920E
|
||||
94A9 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-94A9
|
||||
9621 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9621
|
||||
9646 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9646
|
||||
964C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-964C
|
||||
9678 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-9678
|
||||
96F6 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-96F6
|
||||
A6E6..A6EF ; Numeric # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
|
||||
A830..A835 ; Numeric # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
|
||||
F96B ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F96B
|
||||
F973 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F973
|
||||
F978 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F978
|
||||
F9B2 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9B2
|
||||
F9D1 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9D1
|
||||
F9D3 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9D3
|
||||
F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD
|
||||
10107..10133 ; Numeric # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
|
||||
10140..10174 ; Numeric # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
|
||||
10175..10178 ; Numeric # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
|
||||
1018A..1018B ; Numeric # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
|
||||
102E1..102FB ; Numeric # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
10320..10323 ; Numeric # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
|
||||
10341 ; Numeric # Nl GOTHIC LETTER NINETY
|
||||
1034A ; Numeric # Nl GOTHIC LETTER NINE HUNDRED
|
||||
103D1..103D5 ; Numeric # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
|
||||
10858..1085F ; Numeric # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
|
||||
10879..1087F ; Numeric # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
|
||||
108A7..108AF ; Numeric # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
|
||||
108FB..108FF ; Numeric # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED
|
||||
10916..1091B ; Numeric # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
|
||||
109BC..109BD ; Numeric # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF
|
||||
109C0..109CF ; Numeric # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY
|
||||
109D2..109FF ; Numeric # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS
|
||||
10A44..10A48 ; Numeric # No [5] KHAROSHTHI NUMBER TEN..KHAROSHTHI FRACTION ONE HALF
|
||||
10A7D..10A7E ; Numeric # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
|
||||
10A9D..10A9F ; Numeric # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
|
||||
10AEB..10AEF ; Numeric # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
|
||||
10B58..10B5F ; Numeric # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
|
||||
10B78..10B7F ; Numeric # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
|
||||
10BA9..10BAF ; Numeric # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
|
||||
10CFA..10CFF ; Numeric # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND
|
||||
10E69..10E7E ; Numeric # No [22] RUMI NUMBER TEN..RUMI FRACTION TWO THIRDS
|
||||
10F1D..10F26 ; Numeric # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
|
||||
10F51..10F54 ; Numeric # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
|
||||
10FC5..10FCB ; Numeric # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
|
||||
1105B..11065 ; Numeric # No [11] BRAHMI NUMBER TEN..BRAHMI NUMBER ONE THOUSAND
|
||||
111E1..111F4 ; Numeric # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
|
||||
1173A..1173B ; Numeric # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
|
||||
118EA..118F2 ; Numeric # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
|
||||
11C5A..11C6C ; Numeric # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
|
||||
11FC0..11FD4 ; Numeric # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
|
||||
12400..1246E ; Numeric # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
|
||||
16B5B..16B61 ; Numeric # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
|
||||
16E80..16E96 ; Numeric # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
|
||||
1D2C0..1D2D3 ; Numeric # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
|
||||
1D2E0..1D2F3 ; Numeric # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
|
||||
1D360..1D378 ; Numeric # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
|
||||
1E8C7..1E8CF ; Numeric # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
|
||||
1EC71..1ECAB ; Numeric # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE
|
||||
1ECAD..1ECAF ; Numeric # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS
|
||||
1ECB1..1ECB4 ; Numeric # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK
|
||||
1ED01..1ED2D ; Numeric # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND
|
||||
1ED2F..1ED3D ; Numeric # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
|
||||
1F10B..1F10C ; Numeric # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
20001 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20001
|
||||
20064 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20064
|
||||
200E2 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-200E2
|
||||
20121 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20121
|
||||
2092A ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2092A
|
||||
20983 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20983
|
||||
2098C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2098C
|
||||
2099C ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2099C
|
||||
20AEA ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20AEA
|
||||
20AFD ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20AFD
|
||||
20B19 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-20B19
|
||||
22390 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-22390
|
||||
22998 ; Numeric # Lo CJK UNIFIED IDEOGRAPH-22998
|
||||
23B1B ; Numeric # Lo CJK UNIFIED IDEOGRAPH-23B1B
|
||||
2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D
|
||||
2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890
|
||||
|
||||
# Total code points: 1114
|
||||
|
||||
# ================================================
|
||||
|
||||
00B2..00B3 ; Digit # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
|
||||
00B9 ; Digit # No SUPERSCRIPT ONE
|
||||
1369..1371 ; Digit # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
|
||||
19DA ; Digit # No NEW TAI LUE THAM DIGIT ONE
|
||||
2070 ; Digit # No SUPERSCRIPT ZERO
|
||||
2074..2079 ; Digit # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
|
||||
2080..2089 ; Digit # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
|
||||
2460..2468 ; Digit # No [9] CIRCLED DIGIT ONE..CIRCLED DIGIT NINE
|
||||
2474..247C ; Digit # No [9] PARENTHESIZED DIGIT ONE..PARENTHESIZED DIGIT NINE
|
||||
2488..2490 ; Digit # No [9] DIGIT ONE FULL STOP..DIGIT NINE FULL STOP
|
||||
24EA ; Digit # No CIRCLED DIGIT ZERO
|
||||
24F5..24FD ; Digit # No [9] DOUBLE CIRCLED DIGIT ONE..DOUBLE CIRCLED DIGIT NINE
|
||||
24FF ; Digit # No NEGATIVE CIRCLED DIGIT ZERO
|
||||
2776..277E ; Digit # No [9] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED DIGIT NINE
|
||||
2780..2788 ; Digit # No [9] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT CIRCLED SANS-SERIF DIGIT NINE
|
||||
278A..2792 ; Digit # No [9] DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
|
||||
10A40..10A43 ; Digit # No [4] KHAROSHTHI DIGIT ONE..KHAROSHTHI DIGIT FOUR
|
||||
10E60..10E68 ; Digit # No [9] RUMI DIGIT ONE..RUMI DIGIT NINE
|
||||
11052..1105A ; Digit # No [9] BRAHMI NUMBER ONE..BRAHMI NUMBER NINE
|
||||
1F100..1F10A ; Digit # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
|
||||
|
||||
# Total code points: 128
|
||||
|
||||
# ================================================
|
||||
|
||||
0030..0039 ; Decimal # Nd [10] DIGIT ZERO..DIGIT NINE
|
||||
0660..0669 ; Decimal # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
06F0..06F9 ; Decimal # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
07C0..07C9 ; Decimal # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
|
||||
0966..096F ; Decimal # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
09E6..09EF ; Decimal # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
0A66..0A6F ; Decimal # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
|
||||
0AE6..0AEF ; Decimal # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
|
||||
0B66..0B6F ; Decimal # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0BE6..0BEF ; Decimal # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
|
||||
0C66..0C6F ; Decimal # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
|
||||
0CE6..0CEF ; Decimal # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0D66..0D6F ; Decimal # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
0DE6..0DEF ; Decimal # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
|
||||
0E50..0E59 ; Decimal # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
|
||||
0ED0..0ED9 ; Decimal # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
|
||||
0F20..0F29 ; Decimal # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
|
||||
1040..1049 ; Decimal # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
|
||||
1090..1099 ; Decimal # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
|
||||
17E0..17E9 ; Decimal # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
|
||||
1810..1819 ; Decimal # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
|
||||
1946..194F ; Decimal # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
|
||||
19D0..19D9 ; Decimal # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
1A80..1A89 ; Decimal # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
|
||||
1A90..1A99 ; Decimal # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
|
||||
1B50..1B59 ; Decimal # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
|
||||
1BB0..1BB9 ; Decimal # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
|
||||
1C40..1C49 ; Decimal # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
|
||||
1C50..1C59 ; Decimal # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
|
||||
A620..A629 ; Decimal # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
|
||||
A8D0..A8D9 ; Decimal # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
|
||||
A900..A909 ; Decimal # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
|
||||
A9D0..A9D9 ; Decimal # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
|
||||
A9F0..A9F9 ; Decimal # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
|
||||
AA50..AA59 ; Decimal # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
|
||||
ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
|
||||
FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
|
||||
104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
|
||||
10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
|
||||
11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
|
||||
11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
|
||||
111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
|
||||
112F0..112F9 ; Decimal # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
|
||||
11450..11459 ; Decimal # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
|
||||
114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
|
||||
11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
|
||||
116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
|
||||
11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
|
||||
118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
|
||||
11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
|
||||
11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
|
||||
11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
|
||||
11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
|
||||
11F50..11F59 ; Decimal # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
|
||||
16A60..16A69 ; Decimal # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
|
||||
16AC0..16AC9 ; Decimal # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
|
||||
16B50..16B59 ; Decimal # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
|
||||
1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
1E140..1E149 ; Decimal # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
|
||||
1E2F0..1E2F9 ; Decimal # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
|
||||
1E4F0..1E4F9 ; Decimal # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
|
||||
1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
|
||||
1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
|
||||
|
||||
# Total code points: 680
|
||||
|
||||
# EOF
|
66
deps/zg/src/CanonData.zig
vendored
Normal file
66
deps/zg/src/CanonData.zig
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
nfc: std.AutoHashMap([2]u21, u21),
|
||||
nfd: [][]u21 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("canon");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var self = Self{
|
||||
.allocator = allocator,
|
||||
.nfc = std.AutoHashMap([2]u21, u21).init(allocator),
|
||||
.nfd = try allocator.alloc([]u21, 0x110000),
|
||||
};
|
||||
|
||||
var slices: usize = 0;
|
||||
errdefer {
|
||||
self.nfc.deinit();
|
||||
for (self.nfd[0..slices]) |slice| self.allocator.free(slice);
|
||||
self.allocator.free(self.nfd);
|
||||
}
|
||||
|
||||
@memset(self.nfd, &.{});
|
||||
|
||||
while (true) {
|
||||
const len: u8 = try reader.readInt(u8, endian);
|
||||
if (len == 0) break;
|
||||
const cp = try reader.readInt(u24, endian);
|
||||
self.nfd[cp] = try allocator.alloc(u21, len - 1);
|
||||
slices += 1;
|
||||
for (0..len - 1) |i| {
|
||||
self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian));
|
||||
}
|
||||
if (len == 3) {
|
||||
try self.nfc.put(self.nfd[cp][0..2].*, @intCast(cp));
|
||||
}
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.nfc.deinit();
|
||||
for (self.nfd) |slice| self.allocator.free(slice);
|
||||
self.allocator.free(self.nfd);
|
||||
}
|
||||
|
||||
/// Returns canonical decomposition for `cp`.
|
||||
pub fn toNfd(self: Self, cp: u21) []const u21 {
|
||||
return self.nfd[cp];
|
||||
}
|
||||
|
||||
// Returns the primary composite for the codepoints in `cp`.
|
||||
pub fn toNfc(self: Self, cps: [2]u21) ?u21 {
|
||||
return self.nfc.get(cps);
|
||||
}
|
202
deps/zg/src/CaseData.zig
vendored
Normal file
202
deps/zg/src/CaseData.zig
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
const unicode = std.unicode;
|
||||
|
||||
const CodePointIterator = @import("code_point").Iterator;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
case_map: [][2]u21,
|
||||
prop_s1: []u16 = undefined,
|
||||
prop_s2: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{
|
||||
.allocator = allocator,
|
||||
.case_map = try allocator.alloc([2]u21, 0x110000),
|
||||
};
|
||||
errdefer allocator.free(self.case_map);
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
self.case_map[cp] = .{ cp, cp };
|
||||
}
|
||||
|
||||
// Uppercase
|
||||
const upper_bytes = @embedFile("upper");
|
||||
var upper_fbs = std.io.fixedBufferStream(upper_bytes);
|
||||
var upper_decomp = decompressor(.raw, upper_fbs.reader());
|
||||
var upper_reader = upper_decomp.reader();
|
||||
|
||||
while (true) {
|
||||
const cp = try upper_reader.readInt(i24, endian);
|
||||
if (cp == 0) break;
|
||||
const diff = try upper_reader.readInt(i24, endian);
|
||||
self.case_map[@intCast(cp)][0] = @intCast(cp + diff);
|
||||
}
|
||||
|
||||
// Lowercase
|
||||
const lower_bytes = @embedFile("lower");
|
||||
var lower_fbs = std.io.fixedBufferStream(lower_bytes);
|
||||
var lower_decomp = decompressor(.raw, lower_fbs.reader());
|
||||
var lower_reader = lower_decomp.reader();
|
||||
|
||||
while (true) {
|
||||
const cp = try lower_reader.readInt(i24, endian);
|
||||
if (cp == 0) break;
|
||||
const diff = try lower_reader.readInt(i24, endian);
|
||||
self.case_map[@intCast(cp)][1] = @intCast(cp + diff);
|
||||
}
|
||||
|
||||
// Case properties
|
||||
const cp_bytes = @embedFile("case_prop");
|
||||
var cp_fbs = std.io.fixedBufferStream(cp_bytes);
|
||||
var cp_decomp = decompressor(.raw, cp_fbs.reader());
|
||||
var cp_reader = cp_decomp.reader();
|
||||
|
||||
const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
|
||||
self.prop_s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.prop_s1);
|
||||
for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try cp_reader.readInt(u16, endian);
|
||||
self.prop_s2 = try allocator.alloc(u8, stage_2_len);
|
||||
errdefer allocator.free(self.prop_s2);
|
||||
_ = try cp_reader.readAll(self.prop_s2);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.case_map);
|
||||
self.allocator.free(self.prop_s1);
|
||||
self.allocator.free(self.prop_s2);
|
||||
}
|
||||
|
||||
// Returns true if `cp` is either upper, lower, or title case.
|
||||
pub fn isCased(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
||||
|
||||
// Returns true if `cp` is uppercase.
|
||||
pub fn isUpper(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
|
||||
}
|
||||
|
||||
/// Returns true if `str` is all uppercase.
|
||||
pub fn isUpperStr(self: Self, str: []const u8) bool {
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
|
||||
return while (iter.next()) |cp| {
|
||||
if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
test "isUpperStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!"));
|
||||
try testing.expect(!cd.isUpperStr("hello, world 2112!"));
|
||||
try testing.expect(!cd.isUpperStr("Hello, World 2112!"));
|
||||
}
|
||||
|
||||
/// Returns uppercase mapping for `cp`.
|
||||
pub fn toUpper(self: Self, cp: u21) u21 {
|
||||
return self.case_map[cp][0];
|
||||
}
|
||||
|
||||
/// Returns a new string with all letters in uppercase.
|
||||
/// Caller must free returned bytes with `allocator`.
|
||||
pub fn toUpperStr(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
) ![]u8 {
|
||||
var bytes = std.ArrayList(u8).init(allocator);
|
||||
defer bytes.deinit();
|
||||
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
while (iter.next()) |cp| {
|
||||
const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf);
|
||||
try bytes.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return try bytes.toOwnedSlice();
|
||||
}
|
||||
|
||||
test "toUpperStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!");
|
||||
defer testing.allocator.free(uppered);
|
||||
try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
|
||||
}
|
||||
|
||||
// Returns true if `cp` is lowercase.
|
||||
pub fn isLower(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
|
||||
}
|
||||
|
||||
/// Returns true if `str` is all lowercase.
|
||||
pub fn isLowerStr(self: Self, str: []const u8) bool {
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
|
||||
return while (iter.next()) |cp| {
|
||||
if (self.isCased(cp.code) and !self.isLower(cp.code)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
test "isLowerStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
try testing.expect(cd.isLowerStr("hello, world 2112!"));
|
||||
try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!"));
|
||||
try testing.expect(!cd.isLowerStr("Hello, World 2112!"));
|
||||
}
|
||||
|
||||
/// Returns lowercase mapping for `cp`.
|
||||
pub fn toLower(self: Self, cp: u21) u21 {
|
||||
return self.case_map[cp][1];
|
||||
}
|
||||
|
||||
/// Returns a new string with all letters in lowercase.
|
||||
/// Caller must free returned bytes with `allocator`.
|
||||
pub fn toLowerStr(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
) ![]u8 {
|
||||
var bytes = std.ArrayList(u8).init(allocator);
|
||||
defer bytes.deinit();
|
||||
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
while (iter.next()) |cp| {
|
||||
const len = try unicode.utf8Encode(self.toLower(cp.code), &buf);
|
||||
try bytes.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return try bytes.toOwnedSlice();
|
||||
}
|
||||
|
||||
test "toLowerStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!");
|
||||
defer testing.allocator.free(lowered);
|
||||
try testing.expectEqualStrings("hello, world 2112!", lowered);
|
||||
}
|
189
deps/zg/src/CaseFold.zig
vendored
Normal file
189
deps/zg/src/CaseFold.zig
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
const ascii = @import("ascii");
|
||||
pub const FoldData = @import("FoldData");
|
||||
const Normalize = @import("Normalize");
|
||||
|
||||
fold_data: *const FoldData,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Produces the case folded code points for `cps`. Caller must free returned
|
||||
/// slice with `allocator`.
|
||||
pub fn caseFold(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
cps: []const u21,
|
||||
) ![]const u21 {
|
||||
var cfcps = std.ArrayList(u21).init(allocator);
|
||||
defer cfcps.deinit();
|
||||
var buf: [3]u21 = undefined;
|
||||
|
||||
for (cps) |cp| {
|
||||
const cf = self.fold_data.caseFold(cp, &buf);
|
||||
|
||||
if (cf.len == 0) {
|
||||
try cfcps.append(cp);
|
||||
} else {
|
||||
try cfcps.appendSlice(cf);
|
||||
}
|
||||
}
|
||||
|
||||
return try cfcps.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
|
||||
return for (cps) |cp| {
|
||||
if (self.fold_data.changesWhenCaseFolded(cp)) break true;
|
||||
} else false;
|
||||
}
|
||||
|
||||
/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most
|
||||
/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
|
||||
pub fn compatCaselessMatch(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
normalizer: *const Normalize,
|
||||
a: []const u8,
|
||||
b: []const u8,
|
||||
) !bool {
|
||||
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
|
||||
|
||||
// Process a
|
||||
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
|
||||
defer allocator.free(nfd_a);
|
||||
|
||||
var need_free_cf_nfd_a = false;
|
||||
var cf_nfd_a: []const u21 = nfd_a;
|
||||
if (self.changesWhenCaseFolded(nfd_a)) {
|
||||
cf_nfd_a = try self.caseFold(allocator, nfd_a);
|
||||
need_free_cf_nfd_a = true;
|
||||
}
|
||||
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
|
||||
|
||||
const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
|
||||
defer allocator.free(nfkd_cf_nfd_a);
|
||||
const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
|
||||
defer allocator.free(cf_nfkd_cf_nfd_a);
|
||||
const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
|
||||
defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
|
||||
|
||||
// Process b
|
||||
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
|
||||
defer allocator.free(nfd_b);
|
||||
|
||||
var need_free_cf_nfd_b = false;
|
||||
var cf_nfd_b: []const u21 = nfd_b;
|
||||
if (self.changesWhenCaseFolded(nfd_b)) {
|
||||
cf_nfd_b = try self.caseFold(allocator, nfd_b);
|
||||
need_free_cf_nfd_b = true;
|
||||
}
|
||||
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
|
||||
|
||||
const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
|
||||
defer allocator.free(nfkd_cf_nfd_b);
|
||||
const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
|
||||
defer allocator.free(cf_nfkd_cf_nfd_b);
|
||||
const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
|
||||
defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
|
||||
|
||||
return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
|
||||
}
|
||||
|
||||
test "compatCaselessMatch" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
const norm_data = try Normalize.NormData.init(allocator);
|
||||
defer norm_data.deinit();
|
||||
const n = Normalize{ .norm_data = &norm_data };
|
||||
|
||||
const fold_data = try FoldData.init(allocator);
|
||||
defer fold_data.deinit();
|
||||
const caser = Self{ .fold_data = &fold_data };
|
||||
|
||||
try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
|
||||
|
||||
const a = "Héllo World! \u{3d3}";
|
||||
const b = "He\u{301}llo World! \u{3a5}\u{301}";
|
||||
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));
|
||||
|
||||
const c = "He\u{301}llo World! \u{3d2}\u{301}";
|
||||
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
|
||||
}
|
||||
|
||||
/// Performs canonical caseless string matching by decomposing to NFD. This is
|
||||
/// faster than `compatCaselessMatch`, but less comprehensive.
|
||||
pub fn canonCaselessMatch(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
normalizer: *const Normalize,
|
||||
a: []const u8,
|
||||
b: []const u8,
|
||||
) !bool {
|
||||
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
|
||||
|
||||
// Process a
|
||||
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
|
||||
defer allocator.free(nfd_a);
|
||||
|
||||
var need_free_cf_nfd_a = false;
|
||||
var cf_nfd_a: []const u21 = nfd_a;
|
||||
if (self.changesWhenCaseFolded(nfd_a)) {
|
||||
cf_nfd_a = try self.caseFold(allocator, nfd_a);
|
||||
need_free_cf_nfd_a = true;
|
||||
}
|
||||
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
|
||||
|
||||
var need_free_nfd_cf_nfd_a = false;
|
||||
var nfd_cf_nfd_a = cf_nfd_a;
|
||||
if (!need_free_cf_nfd_a) {
|
||||
nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a);
|
||||
need_free_nfd_cf_nfd_a = true;
|
||||
}
|
||||
defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
|
||||
|
||||
// Process b
|
||||
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
|
||||
defer allocator.free(nfd_b);
|
||||
|
||||
var need_free_cf_nfd_b = false;
|
||||
var cf_nfd_b: []const u21 = nfd_b;
|
||||
if (self.changesWhenCaseFolded(nfd_b)) {
|
||||
cf_nfd_b = try self.caseFold(allocator, nfd_b);
|
||||
need_free_cf_nfd_b = true;
|
||||
}
|
||||
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
|
||||
|
||||
var need_free_nfd_cf_nfd_b = false;
|
||||
var nfd_cf_nfd_b = cf_nfd_b;
|
||||
if (!need_free_cf_nfd_b) {
|
||||
nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b);
|
||||
need_free_nfd_cf_nfd_b = true;
|
||||
}
|
||||
defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
|
||||
|
||||
return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b);
|
||||
}
|
||||
|
||||
test "canonCaselessMatch" {
|
||||
const allocator = testing.allocator;
|
||||
|
||||
const norm_data = try Normalize.NormData.init(allocator);
|
||||
defer norm_data.deinit();
|
||||
const n = Normalize{ .norm_data = &norm_data };
|
||||
|
||||
const fold_data = try FoldData.init(allocator);
|
||||
defer fold_data.deinit();
|
||||
const caser = Self{ .fold_data = &fold_data };
|
||||
|
||||
try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
|
||||
|
||||
const a = "Héllo World! \u{3d3}";
|
||||
const b = "He\u{301}llo World! \u{3a5}\u{301}";
|
||||
try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b));
|
||||
|
||||
const c = "He\u{301}llo World! \u{3d2}\u{301}";
|
||||
try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c));
|
||||
}
|
49
deps/zg/src/CombiningData.zig
vendored
Normal file
49
deps/zg/src/CombiningData.zig
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("ccc");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const stage_1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u8, stage_2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
_ = try reader.readAll(self.s2);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
}
|
||||
|
||||
/// Returns the canonical combining class for a code point.
|
||||
pub fn ccc(self: Self, cp: u21) u8 {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
|
||||
}
|
||||
|
||||
/// True if `cp` is a starter code point, not a combining character.
|
||||
pub fn isStarter(self: Self, cp: u21) bool {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0;
|
||||
}
|
50
deps/zg/src/CompatData.zig
vendored
Normal file
50
deps/zg/src/CompatData.zig
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
nfkd: [][]u21 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("compat");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var self = Self{
|
||||
.allocator = allocator,
|
||||
.nfkd = try allocator.alloc([]u21, 0x110000),
|
||||
};
|
||||
errdefer self.deinit();
|
||||
|
||||
@memset(self.nfkd, &.{});
|
||||
|
||||
while (true) {
|
||||
const len: u8 = try reader.readInt(u8, endian);
|
||||
if (len == 0) break;
|
||||
const cp = try reader.readInt(u24, endian);
|
||||
self.nfkd[cp] = try allocator.alloc(u21, len - 1);
|
||||
for (0..len - 1) |i| {
|
||||
self.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian));
|
||||
}
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
for (self.nfkd) |slice| {
|
||||
if (slice.len != 0) self.allocator.free(slice);
|
||||
}
|
||||
self.allocator.free(self.nfkd);
|
||||
}
|
||||
|
||||
/// Returns compatibility decomposition for `cp`.
|
||||
pub fn toNfkd(self: Self, cp: u21) []u21 {
|
||||
return self.nfkd[cp];
|
||||
}
|
355
deps/zg/src/DisplayWidth.zig
vendored
Normal file
355
deps/zg/src/DisplayWidth.zig
vendored
Normal file
@@ -0,0 +1,355 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const ArrayList = std.ArrayList;
|
||||
const mem = std.mem;
|
||||
const simd = std.simd;
|
||||
const testing = std.testing;
|
||||
|
||||
const ascii = @import("ascii");
|
||||
const CodePointIterator = @import("code_point").Iterator;
|
||||
const GraphemeIterator = @import("grapheme").Iterator;
|
||||
pub const DisplayWidthData = @import("DisplayWidthData");
|
||||
|
||||
data: *const DisplayWidthData,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// strWidth returns the total display width of `str` as the number of cells
|
||||
/// required in a fixed-pitch font (i.e. a terminal screen).
|
||||
pub fn strWidth(self: Self, str: []const u8) usize {
|
||||
var total: isize = 0;
|
||||
|
||||
// ASCII fast path
|
||||
if (ascii.isAsciiOnly(str)) {
|
||||
for (str) |b| total += self.data.codePointWidth(b);
|
||||
return @intCast(@max(0, total));
|
||||
}
|
||||
|
||||
var giter = GraphemeIterator.init(str, &self.data.g_data);
|
||||
|
||||
while (giter.next()) |gc| {
|
||||
var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
|
||||
var gc_total: isize = 0;
|
||||
|
||||
while (cp_iter.next()) |cp| {
|
||||
var w = self.data.codePointWidth(cp.code);
|
||||
|
||||
if (w != 0) {
|
||||
// Handle text emoji sequence.
|
||||
if (cp_iter.next()) |ncp| {
|
||||
// emoji text sequence.
|
||||
if (ncp.code == 0xFE0E) w = 1;
|
||||
if (ncp.code == 0xFE0F) w = 2;
|
||||
}
|
||||
|
||||
// Only adding width of first non-zero-width code point.
|
||||
if (gc_total == 0) {
|
||||
gc_total = w;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
total += gc_total;
|
||||
}
|
||||
|
||||
return @intCast(@max(0, total));
|
||||
}
|
||||
|
||||
test "strWidth" {
|
||||
const data = try DisplayWidthData.init(testing.allocator);
|
||||
defer data.deinit();
|
||||
const self = Self{ .data = &data };
|
||||
|
||||
try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n"));
|
||||
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}"));
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
|
||||
try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊"));
|
||||
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊"));
|
||||
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)"));
|
||||
try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸"));
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji
|
||||
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
|
||||
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation
|
||||
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector
|
||||
try testing.expectEqual(@as(usize, 0), self.strWidth("A\x08")); // Backspace
|
||||
try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA")); // DEL
|
||||
try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA\x08\x08")); // never less than o
|
||||
|
||||
// wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
|
||||
const empty = "";
|
||||
try testing.expectEqual(@as(usize, 0), self.strWidth(empty));
|
||||
const with_null = "hello\x00world";
|
||||
try testing.expectEqual(@as(usize, 10), self.strWidth(with_null));
|
||||
const hello_jp = "コンニチハ, セカイ!";
|
||||
try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp));
|
||||
const control = "\x1b[0m";
|
||||
try testing.expectEqual(@as(usize, 3), self.strWidth(control));
|
||||
const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
|
||||
try testing.expectEqual(@as(usize, 3), self.strWidth(balinese));
|
||||
|
||||
// These commented out tests require a new specification for complex scripts.
|
||||
// See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
|
||||
// const jamo = "\u{1100}\u{1160}";
|
||||
// try testing.expectEqual(@as(usize, 3), strWidth(jamo));
|
||||
// const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
|
||||
// try testing.expectEqual(@as(usize, 3), strWidth(devengari));
|
||||
// const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
|
||||
// try testing.expectEqual(@as(usize, 5), strWidth(tamal));
|
||||
// const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
|
||||
// try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
|
||||
// The following passes but as a mere coincidence.
|
||||
const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2));
|
||||
|
||||
// From Rust https://github.com/jameslanska/unicode-display-width
|
||||
try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻"));
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("🦀"));
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("👨👩👧👧"));
|
||||
try testing.expectEqual(@as(usize, 2), self.strWidth("👩🔬"));
|
||||
try testing.expectEqual(@as(usize, 9), self.strWidth("sane text"));
|
||||
try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
|
||||
try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나"));
|
||||
try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}"));
|
||||
}
|
||||
|
||||
/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
|
||||
/// If the length of `str` and `total_width` have different parity, the right side of `str` will
|
||||
/// receive one additional pad. This makes sure the returned string fills the requested width.
|
||||
/// Caller must free returned bytes with `allocator`.
|
||||
pub fn center(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
total_width: usize,
|
||||
pad: []const u8,
|
||||
) ![]u8 {
|
||||
const str_width = self.strWidth(str);
|
||||
if (str_width > total_width) return error.StrTooLong;
|
||||
if (str_width == total_width) return try allocator.dupe(u8, str);
|
||||
|
||||
const pad_width = self.strWidth(pad);
|
||||
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
|
||||
|
||||
const margin_width = @divFloor((total_width - str_width), 2);
|
||||
if (pad_width > margin_width) return error.PadTooLong;
|
||||
const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
|
||||
const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
|
||||
|
||||
var result = try allocator.alloc(u8, pads * pad.len + str.len);
|
||||
var bytes_index: usize = 0;
|
||||
var pads_index: usize = 0;
|
||||
|
||||
while (pads_index < pads / 2) : (pads_index += 1) {
|
||||
@memcpy(result[bytes_index..][0..pad.len], pad);
|
||||
bytes_index += pad.len;
|
||||
}
|
||||
|
||||
@memcpy(result[bytes_index..][0..str.len], str);
|
||||
bytes_index += str.len;
|
||||
|
||||
pads_index = 0;
|
||||
while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
|
||||
@memcpy(result[bytes_index..][0..pad.len], pad);
|
||||
bytes_index += pad.len;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "center" {
|
||||
const allocator = testing.allocator;
|
||||
const data = try DisplayWidthData.init(allocator);
|
||||
defer data.deinit();
|
||||
const self = Self{ .data = &data };
|
||||
|
||||
// Input and width both have odd length
|
||||
var centered = try self.center(allocator, "abc", 9, "*");
|
||||
try testing.expectEqualSlices(u8, "***abc***", centered);
|
||||
|
||||
// Input and width both have even length
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "w😊w", 10, "-");
|
||||
try testing.expectEqualSlices(u8, "---w😊w---", centered);
|
||||
|
||||
// Input has even length, width has odd length
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "1234", 9, "-");
|
||||
try testing.expectEqualSlices(u8, "--1234---", centered);
|
||||
|
||||
// Input has odd length, width has even length
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "123", 8, "-");
|
||||
try testing.expectEqualSlices(u8, "--123---", centered);
|
||||
|
||||
// Input is the same length as the width
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "123", 3, "-");
|
||||
try testing.expectEqualSlices(u8, "123", centered);
|
||||
|
||||
// Input is empty
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "", 3, "-");
|
||||
try testing.expectEqualSlices(u8, "---", centered);
|
||||
|
||||
// Input is empty and width is zero
|
||||
testing.allocator.free(centered);
|
||||
centered = try self.center(allocator, "", 0, "-");
|
||||
try testing.expectEqualSlices(u8, "", centered);
|
||||
|
||||
// Input is longer than the width, which is an error
|
||||
testing.allocator.free(centered);
|
||||
try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-"));
|
||||
}
|
||||
|
||||
/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
|
||||
/// on the left side. Caller must free returned bytes with `allocator`.
|
||||
pub fn padLeft(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
total_width: usize,
|
||||
pad: []const u8,
|
||||
) ![]u8 {
|
||||
const str_width = self.strWidth(str);
|
||||
if (str_width > total_width) return error.StrTooLong;
|
||||
|
||||
const pad_width = self.strWidth(pad);
|
||||
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
|
||||
|
||||
const margin_width = total_width - str_width;
|
||||
if (pad_width > margin_width) return error.PadTooLong;
|
||||
|
||||
const pads = @divFloor(margin_width, pad_width);
|
||||
|
||||
var result = try allocator.alloc(u8, pads * pad.len + str.len);
|
||||
var bytes_index: usize = 0;
|
||||
var pads_index: usize = 0;
|
||||
|
||||
while (pads_index < pads) : (pads_index += 1) {
|
||||
@memcpy(result[bytes_index..][0..pad.len], pad);
|
||||
bytes_index += pad.len;
|
||||
}
|
||||
|
||||
@memcpy(result[bytes_index..][0..str.len], str);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "padLeft" {
|
||||
const allocator = testing.allocator;
|
||||
const data = try DisplayWidthData.init(allocator);
|
||||
defer data.deinit();
|
||||
const self = Self{ .data = &data };
|
||||
|
||||
var right_aligned = try self.padLeft(allocator, "abc", 9, "*");
|
||||
defer testing.allocator.free(right_aligned);
|
||||
try testing.expectEqualSlices(u8, "******abc", right_aligned);
|
||||
|
||||
testing.allocator.free(right_aligned);
|
||||
right_aligned = try self.padLeft(allocator, "w😊w", 10, "-");
|
||||
try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
|
||||
}
|
||||
|
||||
/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
|
||||
/// on the right side. Caller must free returned bytes with `allocator`.
|
||||
pub fn padRight(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
total_width: usize,
|
||||
pad: []const u8,
|
||||
) ![]u8 {
|
||||
const str_width = self.strWidth(str);
|
||||
if (str_width > total_width) return error.StrTooLong;
|
||||
|
||||
const pad_width = self.strWidth(pad);
|
||||
if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
|
||||
|
||||
const margin_width = total_width - str_width;
|
||||
if (pad_width > margin_width) return error.PadTooLong;
|
||||
|
||||
const pads = @divFloor(margin_width, pad_width);
|
||||
|
||||
var result = try allocator.alloc(u8, pads * pad.len + str.len);
|
||||
var bytes_index: usize = 0;
|
||||
var pads_index: usize = 0;
|
||||
|
||||
@memcpy(result[bytes_index..][0..str.len], str);
|
||||
bytes_index += str.len;
|
||||
|
||||
while (pads_index < pads) : (pads_index += 1) {
|
||||
@memcpy(result[bytes_index..][0..pad.len], pad);
|
||||
bytes_index += pad.len;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "padRight" {
|
||||
const allocator = testing.allocator;
|
||||
const data = try DisplayWidthData.init(allocator);
|
||||
defer data.deinit();
|
||||
const self = Self{ .data = &data };
|
||||
|
||||
var left_aligned = try self.padRight(allocator, "abc", 9, "*");
|
||||
defer testing.allocator.free(left_aligned);
|
||||
try testing.expectEqualSlices(u8, "abc******", left_aligned);
|
||||
|
||||
testing.allocator.free(left_aligned);
|
||||
left_aligned = try self.padRight(allocator, "w😊w", 10, "-");
|
||||
try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
|
||||
}
|
||||
|
||||
/// Wraps a string approximately at the given number of colums per line.
|
||||
/// `threshold` defines how far the last column of the last word can be
|
||||
/// from the edge. Caller must free returned bytes with `allocator`.
|
||||
pub fn wrap(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
columns: usize,
|
||||
threshold: usize,
|
||||
) ![]u8 {
|
||||
var result = ArrayList(u8).init(allocator);
|
||||
defer result.deinit();
|
||||
|
||||
var line_iter = mem.tokenizeAny(u8, str, "\r\n");
|
||||
var line_width: usize = 0;
|
||||
|
||||
while (line_iter.next()) |line| {
|
||||
var word_iter = mem.tokenizeScalar(u8, line, ' ');
|
||||
|
||||
while (word_iter.next()) |word| {
|
||||
try result.appendSlice(word);
|
||||
try result.append(' ');
|
||||
line_width += self.strWidth(word) + 1;
|
||||
|
||||
if (line_width > columns or columns - line_width <= threshold) {
|
||||
try result.append('\n');
|
||||
line_width = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove trailing space and newline.
|
||||
_ = result.pop();
|
||||
_ = result.pop();
|
||||
|
||||
return try result.toOwnedSlice();
|
||||
}
|
||||
|
||||
test "wrap" {
|
||||
const allocator = testing.allocator;
|
||||
const data = try DisplayWidthData.init(allocator);
|
||||
defer data.deinit();
|
||||
const self = Self{ .data = &data };
|
||||
|
||||
const input = "The quick brown fox\r\njumped over the lazy dog!";
|
||||
const got = try self.wrap(allocator, input, 10, 3);
|
||||
defer testing.allocator.free(got);
|
||||
const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
|
||||
try testing.expectEqualStrings(want, got);
|
||||
}
|
98
deps/zg/src/FoldData.zig
vendored
Normal file
98
deps/zg/src/FoldData.zig
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
cutoff: u21 = undefined,
|
||||
cwcf_exceptions_min: u21 = undefined,
|
||||
cwcf_exceptions_max: u21 = undefined,
|
||||
cwcf_exceptions: []u21 = undefined,
|
||||
multiple_start: u21 = undefined,
|
||||
stage1: []u8 = undefined,
|
||||
stage2: []u8 = undefined,
|
||||
stage3: []i24 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("fold");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
self.cutoff = @intCast(try reader.readInt(u24, endian));
|
||||
self.multiple_start = @intCast(try reader.readInt(u24, endian));
|
||||
|
||||
var len = try reader.readInt(u16, endian);
|
||||
self.stage1 = try allocator.alloc(u8, len);
|
||||
errdefer allocator.free(self.stage1);
|
||||
for (0..len) |i| self.stage1[i] = try reader.readInt(u8, endian);
|
||||
|
||||
len = try reader.readInt(u16, endian);
|
||||
self.stage2 = try allocator.alloc(u8, len);
|
||||
errdefer allocator.free(self.stage2);
|
||||
for (0..len) |i| self.stage2[i] = try reader.readInt(u8, endian);
|
||||
|
||||
len = try reader.readInt(u16, endian);
|
||||
self.stage3 = try allocator.alloc(i24, len);
|
||||
errdefer allocator.free(self.stage3);
|
||||
for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian);
|
||||
|
||||
self.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian));
|
||||
self.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian));
|
||||
len = try reader.readInt(u16, endian);
|
||||
self.cwcf_exceptions = try allocator.alloc(u21, len);
|
||||
for (0..len) |i| self.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.stage1);
|
||||
self.allocator.free(self.stage2);
|
||||
self.allocator.free(self.stage3);
|
||||
}
|
||||
|
||||
/// Returns the case fold for `cp`.
|
||||
pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 {
|
||||
if (cp >= self.cutoff) return &.{};
|
||||
|
||||
const stage1_val = self.stage1[cp >> 8];
|
||||
if (stage1_val == 0) return &.{};
|
||||
|
||||
const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF);
|
||||
const stage3_index = self.stage2[stage2_index];
|
||||
|
||||
if (stage3_index & 0x80 != 0) {
|
||||
const real_index = @as(usize, self.multiple_start) + (stage3_index ^ 0x80) * 3;
|
||||
const mapping = mem.sliceTo(self.stage3[real_index..][0..3], 0);
|
||||
for (mapping, 0..) |c, i| buf[i] = @intCast(c);
|
||||
|
||||
return buf[0..mapping.len];
|
||||
}
|
||||
|
||||
const offset = self.stage3[stage3_index];
|
||||
if (offset == 0) return &.{};
|
||||
|
||||
buf[0] = @intCast(@as(i32, cp) + offset);
|
||||
|
||||
return buf[0..1];
|
||||
}
|
||||
|
||||
/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
|
||||
pub fn changesWhenCaseFolded(self: Self, cp: u21) bool {
|
||||
var buf: [3]u21 = undefined;
|
||||
const has_mapping = self.caseFold(cp, &buf).len != 0;
|
||||
return has_mapping and !self.isCwcfException(cp);
|
||||
}
|
||||
|
||||
fn isCwcfException(self: Self, cp: u21) bool {
|
||||
return cp >= self.cwcf_exceptions_min and
|
||||
cp <= self.cwcf_exceptions_max and
|
||||
std.mem.indexOfScalar(u21, self.cwcf_exceptions, cp) != null;
|
||||
}
|
171
deps/zg/src/GenCatData.zig
vendored
Normal file
171
deps/zg/src/GenCatData.zig
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
/// General Category
|
||||
pub const Gc = enum {
|
||||
Cc, // Other, Control
|
||||
Cf, // Other, Format
|
||||
Cn, // Other, Unassigned
|
||||
Co, // Other, Private Use
|
||||
Cs, // Other, Surrogate
|
||||
Ll, // Letter, Lowercase
|
||||
Lm, // Letter, Modifier
|
||||
Lo, // Letter, Other
|
||||
Lu, // Letter, Uppercase
|
||||
Lt, // Letter, Titlecase
|
||||
Mc, // Mark, Spacing Combining
|
||||
Me, // Mark, Enclosing
|
||||
Mn, // Mark, Non-Spacing
|
||||
Nd, // Number, Decimal Digit
|
||||
Nl, // Number, Letter
|
||||
No, // Number, Other
|
||||
Pc, // Punctuation, Connector
|
||||
Pd, // Punctuation, Dash
|
||||
Pe, // Punctuation, Close
|
||||
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
|
||||
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
|
||||
Po, // Punctuation, Other
|
||||
Ps, // Punctuation, Open
|
||||
Sc, // Symbol, Currency
|
||||
Sk, // Symbol, Modifier
|
||||
Sm, // Symbol, Math
|
||||
So, // Symbol, Other
|
||||
Zl, // Separator, Line
|
||||
Zp, // Separator, Paragraph
|
||||
Zs, // Separator, Space
|
||||
};
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u5 = undefined,
|
||||
s3: []u5 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("gencat");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const s1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, s1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const s2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u5, s2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
|
||||
|
||||
const s3_len: u16 = try reader.readInt(u8, endian);
|
||||
self.s3 = try allocator.alloc(u5, s3_len);
|
||||
errdefer allocator.free(self.s3);
|
||||
for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
self.allocator.free(self.s3);
|
||||
}
|
||||
|
||||
/// Lookup the General Category for `cp`.
|
||||
pub fn gc(self: Self, cp: u21) Gc {
|
||||
return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
|
||||
}
|
||||
|
||||
/// True if `cp` has an C general category.
|
||||
pub fn isControl(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Cc,
|
||||
.Cf,
|
||||
.Cn,
|
||||
.Co,
|
||||
.Cs,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an L general category.
|
||||
pub fn isLetter(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Ll,
|
||||
.Lm,
|
||||
.Lo,
|
||||
.Lu,
|
||||
.Lt,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an M general category.
|
||||
pub fn isMark(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Mc,
|
||||
.Me,
|
||||
.Mn,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an N general category.
|
||||
pub fn isNumber(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Nd,
|
||||
.Nl,
|
||||
.No,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an P general category.
|
||||
pub fn isPunctuation(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Pc,
|
||||
.Pd,
|
||||
.Pe,
|
||||
.Pf,
|
||||
.Pi,
|
||||
.Po,
|
||||
.Ps,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an S general category.
|
||||
pub fn isSymbol(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Sc,
|
||||
.Sk,
|
||||
.Sm,
|
||||
.So,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
/// True if `cp` has an Z general category.
|
||||
pub fn isSeparator(self: Self, cp: u21) bool {
|
||||
return switch (self.gc(cp)) {
|
||||
.Zl,
|
||||
.Zp,
|
||||
.Zs,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
88
deps/zg/src/GraphemeData.zig
vendored
Normal file
88
deps/zg/src/GraphemeData.zig
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
|
||||
/// Indic syllable type.
|
||||
pub const Indic = enum {
|
||||
none,
|
||||
|
||||
Consonant,
|
||||
Extend,
|
||||
Linker,
|
||||
};
|
||||
|
||||
/// Grapheme break property.
|
||||
pub const Gbp = enum {
|
||||
none,
|
||||
Control,
|
||||
CR,
|
||||
Extend,
|
||||
L,
|
||||
LF,
|
||||
LV,
|
||||
LVT,
|
||||
Prepend,
|
||||
Regional_Indicator,
|
||||
SpacingMark,
|
||||
T,
|
||||
V,
|
||||
ZWJ,
|
||||
};
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u16 = undefined,
|
||||
s3: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("gbp");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const s1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, s1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const s2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u16, s2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const s3_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s3 = try allocator.alloc(u8, s3_len);
|
||||
errdefer allocator.free(self.s3);
|
||||
_ = try reader.readAll(self.s3);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
self.allocator.free(self.s3);
|
||||
}
|
||||
|
||||
/// Lookup the grapheme break property for a code point.
|
||||
pub fn gbp(self: Self, cp: u21) Gbp {
|
||||
return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
|
||||
}
|
||||
|
||||
/// Lookup the indic syllable type for a code point.
|
||||
pub fn indic(self: Self, cp: u21) Indic {
|
||||
return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
|
||||
}
|
||||
|
||||
/// Lookup the indic syllable type for a code point.
|
||||
pub fn isEmoji(self: Self, cp: u21) bool {
|
||||
return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
|
||||
}
|
53
deps/zg/src/HangulData.zig
vendored
Normal file
53
deps/zg/src/HangulData.zig
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
pub const Syllable = enum {
|
||||
none,
|
||||
L,
|
||||
LV,
|
||||
LVT,
|
||||
V,
|
||||
T,
|
||||
};
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u3 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("hangul");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const stage_1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u3, stage_2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
}
|
||||
|
||||
/// Returns the Hangul syllable type for `cp`.
|
||||
pub fn syllable(self: Self, cp: u21) Syllable {
|
||||
return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]);
|
||||
}
|
37
deps/zg/src/NormData.zig
vendored
Normal file
37
deps/zg/src/NormData.zig
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
|
||||
const CanonData = @import("CanonData");
|
||||
const CccData = @import("CombiningData");
|
||||
const CompatData = @import("CompatData");
|
||||
const FoldData = @import("FoldData");
|
||||
const HangulData = @import("HangulData");
|
||||
const NormPropsData = @import("NormPropsData");
|
||||
|
||||
canon_data: CanonData = undefined,
|
||||
ccc_data: CccData = undefined,
|
||||
compat_data: CompatData = undefined,
|
||||
hangul_data: HangulData = undefined,
|
||||
normp_data: NormPropsData = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(self: *Self, allocator: std.mem.Allocator) !void {
|
||||
self.canon_data = try CanonData.init(allocator);
|
||||
errdefer self.canon_data.deinit();
|
||||
self.ccc_data = try CccData.init(allocator);
|
||||
errdefer self.ccc_data.deinit();
|
||||
self.compat_data = try CompatData.init(allocator);
|
||||
errdefer self.compat_data.deinit();
|
||||
self.hangul_data = try HangulData.init(allocator);
|
||||
errdefer self.hangul_data.deinit();
|
||||
self.normp_data = try NormPropsData.init(allocator);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.canon_data.deinit();
|
||||
self.ccc_data.deinit();
|
||||
self.compat_data.deinit();
|
||||
self.hangul_data.deinit();
|
||||
self.normp_data.deinit();
|
||||
}
|
54
deps/zg/src/NormPropsData.zig
vendored
Normal file
54
deps/zg/src/NormPropsData.zig
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u4 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("normp");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const stage_1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u4, stage_2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
}
|
||||
|
||||
/// Returns true if `cp` is already in NFD form.
|
||||
pub fn isNfd(self: Self, cp: u21) bool {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0;
|
||||
}
|
||||
|
||||
/// Returns true if `cp` is already in NFKD form.
|
||||
pub fn isNfkd(self: Self, cp: u21) bool {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0;
|
||||
}
|
||||
|
||||
/// Returns true if `cp` is not allowed in any normalized form.
|
||||
pub fn isFcx(self: Self, cp: u21) bool {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
622
deps/zg/src/Normalize.zig
vendored
Normal file
622
deps/zg/src/Normalize.zig
vendored
Normal file
@@ -0,0 +1,622 @@
|
||||
//! Normalizer contains functions and methods that implement
|
||||
//! Unicode Normalization. You can normalize strings into NFC,
|
||||
//! NFKC, NFD, and NFKD normalization forms.
|
||||
|
||||
const std = @import("std");
|
||||
const debug = std.debug;
|
||||
const assert = debug.assert;
|
||||
const fmt = std.fmt;
|
||||
const heap = std.heap;
|
||||
const mem = std.mem;
|
||||
const simd = std.simd;
|
||||
const testing = std.testing;
|
||||
const unicode = std.unicode;
|
||||
|
||||
const ascii = @import("ascii");
|
||||
const CodePointIterator = @import("code_point").Iterator;
|
||||
pub const NormData = @import("NormData");
|
||||
|
||||
norm_data: *const NormData,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const SBase: u21 = 0xAC00;
|
||||
const LBase: u21 = 0x1100;
|
||||
const VBase: u21 = 0x1161;
|
||||
const TBase: u21 = 0x11A7;
|
||||
const LCount: u21 = 19;
|
||||
const VCount: u21 = 21;
|
||||
const TCount: u21 = 28;
|
||||
const NCount: u21 = 588; // VCount * TCount
|
||||
const SCount: u21 = 11172; // LCount * NCount
|
||||
|
||||
fn decomposeHangul(self: Self, cp: u21, buf: []u21) ?Decomp {
|
||||
const kind = self.norm_data.hangul_data.syllable(cp);
|
||||
if (kind != .LV and kind != .LVT) return null;
|
||||
|
||||
const SIndex: u21 = cp - SBase;
|
||||
const LIndex: u21 = SIndex / NCount;
|
||||
const VIndex: u21 = (SIndex % NCount) / TCount;
|
||||
const TIndex: u21 = SIndex % TCount;
|
||||
const LPart: u21 = LBase + LIndex;
|
||||
const VPart: u21 = VBase + VIndex;
|
||||
|
||||
var dc = Decomp{ .form = .nfd };
|
||||
buf[0] = LPart;
|
||||
buf[1] = VPart;
|
||||
|
||||
if (TIndex == 0) {
|
||||
dc.cps = buf[0..2];
|
||||
return dc;
|
||||
}
|
||||
|
||||
// TPart
|
||||
buf[2] = TBase + TIndex;
|
||||
dc.cps = buf[0..3];
|
||||
return dc;
|
||||
}
|
||||
|
||||
fn composeHangulCanon(lv: u21, t: u21) u21 {
|
||||
assert(0x11A8 <= t and t <= 0x11C2);
|
||||
return lv + (t - TBase);
|
||||
}
|
||||
|
||||
fn composeHangulFull(l: u21, v: u21, t: u21) u21 {
|
||||
assert(0x1100 <= l and l <= 0x1112);
|
||||
assert(0x1161 <= v and v <= 0x1175);
|
||||
const LIndex = l - LBase;
|
||||
const VIndex = v - VBase;
|
||||
const LVIndex = LIndex * NCount + VIndex * TCount;
|
||||
|
||||
if (t == 0) return SBase + LVIndex;
|
||||
|
||||
assert(0x11A8 <= t and t <= 0x11C2);
|
||||
const TIndex = t - TBase;
|
||||
|
||||
return SBase + LVIndex + TIndex;
|
||||
}
|
||||
|
||||
const Form = enum {
|
||||
nfc,
|
||||
nfd,
|
||||
nfkc,
|
||||
nfkd,
|
||||
same,
|
||||
};
|
||||
|
||||
const Decomp = struct {
|
||||
form: Form = .same,
|
||||
cps: []const u21 = &.{},
|
||||
};
|
||||
|
||||
// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
|
||||
fn mapping(self: Self, cp: u21, form: Form) Decomp {
|
||||
var dc = Decomp{};
|
||||
|
||||
switch (form) {
|
||||
.nfd => {
|
||||
dc.cps = self.norm_data.canon_data.toNfd(cp);
|
||||
if (dc.cps.len != 0) dc.form = .nfd;
|
||||
},
|
||||
|
||||
.nfkd => {
|
||||
dc.cps = self.norm_data.compat_data.toNfkd(cp);
|
||||
if (dc.cps.len != 0) {
|
||||
dc.form = .nfkd;
|
||||
} else {
|
||||
dc.cps = self.norm_data.canon_data.toNfd(cp);
|
||||
if (dc.cps.len != 0) dc.form = .nfkd;
|
||||
}
|
||||
},
|
||||
|
||||
else => @panic("Normalizer.mapping only accepts form .nfd or .nfkd."),
|
||||
}
|
||||
|
||||
return dc;
|
||||
}
|
||||
|
||||
// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
|
||||
fn decompose(
|
||||
self: Self,
|
||||
cp: u21,
|
||||
form: Form,
|
||||
buf: []u21,
|
||||
) Decomp {
|
||||
// ASCII
|
||||
if (cp < 128) return .{};
|
||||
|
||||
// NFD / NFKD quick checks.
|
||||
switch (form) {
|
||||
.nfd => if (self.norm_data.normp_data.isNfd(cp)) return .{},
|
||||
.nfkd => if (self.norm_data.normp_data.isNfkd(cp)) return .{},
|
||||
else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."),
|
||||
}
|
||||
|
||||
// Hangul precomposed syllable full decomposition.
|
||||
if (self.decomposeHangul(cp, buf)) |dc| return dc;
|
||||
|
||||
// Full decomposition.
|
||||
var dc = Decomp{ .form = form };
|
||||
|
||||
var result_index: usize = 0;
|
||||
var work_index: usize = 1;
|
||||
|
||||
// Start work with argument code point.
|
||||
var work = [_]u21{cp} ++ [_]u21{0} ** 17;
|
||||
|
||||
while (work_index > 0) {
|
||||
// Look at previous code point in work queue.
|
||||
work_index -= 1;
|
||||
const next = work[work_index];
|
||||
const m = self.mapping(next, form);
|
||||
|
||||
// No more of decompositions for this code point.
|
||||
if (m.form == .same) {
|
||||
buf[result_index] = next;
|
||||
result_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Work backwards through decomposition.
|
||||
// `i` starts at 1 because m_last is 1 past the last code point.
|
||||
var i: usize = 1;
|
||||
while (i <= m.cps.len) : ({
|
||||
i += 1;
|
||||
work_index += 1;
|
||||
}) {
|
||||
work[work_index] = m.cps[m.cps.len - i];
|
||||
}
|
||||
}
|
||||
|
||||
dc.cps = buf[0..result_index];
|
||||
|
||||
return dc;
|
||||
}
|
||||
|
||||
test "decompose" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
var n = Self{ .norm_data = &data };
|
||||
|
||||
var buf: [18]u21 = undefined;
|
||||
|
||||
var dc = n.decompose('é', .nfd, &buf);
|
||||
try testing.expect(dc.form == .nfd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]);
|
||||
|
||||
dc = n.decompose('\u{1e0a}', .nfd, &buf);
|
||||
try testing.expect(dc.form == .nfd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
|
||||
|
||||
dc = n.decompose('\u{1e0a}', .nfkd, &buf);
|
||||
try testing.expect(dc.form == .nfkd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
|
||||
|
||||
dc = n.decompose('\u{3189}', .nfd, &buf);
|
||||
try testing.expect(dc.form == .same);
|
||||
try testing.expect(dc.cps.len == 0);
|
||||
|
||||
dc = n.decompose('\u{3189}', .nfkd, &buf);
|
||||
try testing.expect(dc.form == .nfkd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]);
|
||||
|
||||
dc = n.decompose('\u{ace1}', .nfd, &buf);
|
||||
try testing.expect(dc.form == .nfd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
|
||||
|
||||
dc = n.decompose('\u{ace1}', .nfkd, &buf);
|
||||
try testing.expect(dc.form == .nfd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
|
||||
|
||||
dc = n.decompose('\u{3d3}', .nfd, &buf);
|
||||
try testing.expect(dc.form == .nfd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]);
|
||||
|
||||
dc = n.decompose('\u{3d3}', .nfkd, &buf);
|
||||
try testing.expect(dc.form == .nfkd);
|
||||
try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]);
|
||||
}
|
||||
|
||||
/// Returned from various functions in this namespace. Remember to call `deinit` to free any allocated memory.
|
||||
pub const Result = struct {
|
||||
allocator: ?mem.Allocator = null,
|
||||
slice: []const u8,
|
||||
|
||||
pub fn deinit(self: *const Result) void {
|
||||
if (self.allocator) |allocator| allocator.free(self.slice);
|
||||
}
|
||||
};
|
||||
|
||||
// Compares code points by Canonical Combining Class order.
|
||||
fn cccLess(self: Self, lhs: u21, rhs: u21) bool {
|
||||
return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs);
|
||||
}
|
||||
|
||||
// Applies the Canonical Sorting Algorithm.
|
||||
fn canonicalSort(self: Self, cps: []u21) void {
|
||||
var i: usize = 0;
|
||||
while (i < cps.len) : (i += 1) {
|
||||
const start: usize = i;
|
||||
while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
|
||||
mem.sort(u21, cps[start..i], self, cccLess);
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize `str` to NFD.
|
||||
pub fn nfd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
|
||||
return self.nfxd(allocator, str, .nfd);
|
||||
}
|
||||
|
||||
/// Normalize `str` to NFKD.
|
||||
pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
|
||||
return self.nfxd(allocator, str, .nfkd);
|
||||
}
|
||||
|
||||
pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error![]u21 {
|
||||
var dcp_list = std.ArrayList(u21).init(allocator);
|
||||
defer dcp_list.deinit();
|
||||
|
||||
var cp_iter = CodePointIterator{ .bytes = str };
|
||||
var dc_buf: [18]u21 = undefined;
|
||||
|
||||
while (cp_iter.next()) |cp| {
|
||||
const dc = self.decompose(cp.code, form, &dc_buf);
|
||||
if (dc.form == .same) {
|
||||
try dcp_list.append(cp.code);
|
||||
} else {
|
||||
try dcp_list.appendSlice(dc.cps);
|
||||
}
|
||||
}
|
||||
|
||||
self.canonicalSort(dcp_list.items);
|
||||
|
||||
return try dcp_list.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
|
||||
// Quick checks.
|
||||
if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
|
||||
|
||||
const dcps = try self.nfxdCodePoints(allocator, str, form);
|
||||
defer allocator.free(dcps);
|
||||
|
||||
var dstr_list = std.ArrayList(u8).init(allocator);
|
||||
defer dstr_list.deinit();
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
for (dcps) |dcp| {
|
||||
const len = unicode.utf8Encode(dcp, &buf) catch unreachable;
|
||||
try dstr_list.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return Result{ .allocator = allocator, .slice = try dstr_list.toOwnedSlice() };
|
||||
}
|
||||
|
||||
test "nfd ASCII / no-alloc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfd(allocator, "Hello World!");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("Hello World!", result.slice);
|
||||
}
|
||||
|
||||
test "nfd !ASCII / alloc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
|
||||
}
|
||||
|
||||
test "nfkd ASCII / no-alloc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfkd(allocator, "Hello World!");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("Hello World!", result.slice);
|
||||
}
|
||||
|
||||
test "nfkd !ASCII / alloc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
|
||||
}
|
||||
|
||||
pub fn nfdCodePoints(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
cps: []const u21,
|
||||
) mem.Allocator.Error![]u21 {
|
||||
var dcp_list = std.ArrayList(u21).init(allocator);
|
||||
defer dcp_list.deinit();
|
||||
|
||||
var dc_buf: [18]u21 = undefined;
|
||||
|
||||
for (cps) |cp| {
|
||||
const dc = self.decompose(cp, .nfd, &dc_buf);
|
||||
|
||||
if (dc.form == .same) {
|
||||
try dcp_list.append(cp);
|
||||
} else {
|
||||
try dcp_list.appendSlice(dc.cps);
|
||||
}
|
||||
}
|
||||
|
||||
self.canonicalSort(dcp_list.items);
|
||||
|
||||
return try dcp_list.toOwnedSlice();
|
||||
}
|
||||
|
||||
pub fn nfkdCodePoints(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
cps: []const u21,
|
||||
) mem.Allocator.Error![]u21 {
|
||||
var dcp_list = std.ArrayList(u21).init(allocator);
|
||||
defer dcp_list.deinit();
|
||||
|
||||
var dc_buf: [18]u21 = undefined;
|
||||
|
||||
for (cps) |cp| {
|
||||
const dc = self.decompose(cp, .nfkd, &dc_buf);
|
||||
|
||||
if (dc.form == .same) {
|
||||
try dcp_list.append(cp);
|
||||
} else {
|
||||
try dcp_list.appendSlice(dc.cps);
|
||||
}
|
||||
}
|
||||
|
||||
self.canonicalSort(dcp_list.items);
|
||||
|
||||
return try dcp_list.toOwnedSlice();
|
||||
}
|
||||
|
||||
// Composition (NFC, NFKC)
|
||||
|
||||
fn isHangul(self: Self, cp: u21) bool {
|
||||
return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none;
|
||||
}
|
||||
|
||||
/// Normalizes `str` to NFC.
|
||||
pub fn nfc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
|
||||
return self.nfxc(allocator, str, .nfc);
|
||||
}
|
||||
|
||||
/// Normalizes `str` to NFKC.
|
||||
pub fn nfkc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result {
|
||||
return self.nfxc(allocator, str, .nfkc);
|
||||
}
|
||||
|
||||
fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result {
|
||||
// Quick checks.
|
||||
if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
|
||||
if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
|
||||
|
||||
// Decompose first.
|
||||
var dcps = if (form == .nfc)
|
||||
try self.nfxdCodePoints(allocator, str, .nfd)
|
||||
else
|
||||
try self.nfxdCodePoints(allocator, str, .nfkd);
|
||||
defer allocator.free(dcps);
|
||||
|
||||
// Compose
|
||||
const tombstone = 0xe000; // Start of BMP Private Use Area
|
||||
|
||||
// Loop over all decomposed code points.
|
||||
while (true) {
|
||||
var i: usize = 1; // start at second code point.
|
||||
var deleted: usize = 0;
|
||||
|
||||
// For each code point, C, find the preceding
|
||||
// starter code point L, if any.
|
||||
block_check: while (i < dcps.len) : (i += 1) {
|
||||
const C = dcps[i];
|
||||
if (C == tombstone) continue :block_check;
|
||||
const cc_C = self.norm_data.ccc_data.ccc(C);
|
||||
var starter_index: ?usize = null;
|
||||
var j: usize = i;
|
||||
|
||||
// Seek back to find starter L, if any.
|
||||
while (true) {
|
||||
j -= 1;
|
||||
if (dcps[j] == tombstone) continue;
|
||||
|
||||
// Check for starter.
|
||||
if (self.norm_data.ccc_data.isStarter(dcps[j])) {
|
||||
// Check for blocking conditions.
|
||||
for (dcps[(j + 1)..i]) |B| {
|
||||
if (B == tombstone) continue;
|
||||
const cc_B = self.norm_data.ccc_data.ccc(B);
|
||||
if (cc_B != 0 and self.isHangul(C)) continue :block_check;
|
||||
if (cc_B >= cc_C) continue :block_check;
|
||||
}
|
||||
|
||||
// Found starter at j.
|
||||
starter_index = j;
|
||||
break;
|
||||
}
|
||||
|
||||
if (j == 0) break;
|
||||
}
|
||||
|
||||
// If we have a starter L, see if there's a primary
|
||||
// composite, P, for the sequence L, C. If so, we must
|
||||
// repace L with P and delete C.
|
||||
if (starter_index) |sidx| {
|
||||
const L = dcps[sidx];
|
||||
var processed_hangul = false;
|
||||
|
||||
// If L and C are Hangul syllables, we can compose
|
||||
// them algorithmically if possible.
|
||||
if (self.isHangul(L) and self.isHangul(C)) {
|
||||
// Get Hangul syllable types.
|
||||
const l_stype = self.norm_data.hangul_data.syllable(L);
|
||||
const c_stype = self.norm_data.hangul_data.syllable(C);
|
||||
|
||||
if (l_stype == .LV and c_stype == .T) {
|
||||
// LV, T canonical composition.
|
||||
dcps[sidx] = composeHangulCanon(L, C);
|
||||
dcps[i] = tombstone; // Mark for deletion.
|
||||
processed_hangul = true;
|
||||
}
|
||||
|
||||
if (l_stype == .L and c_stype == .V) {
|
||||
// L, V full composition. L, V, T is handled via main loop.
|
||||
dcps[sidx] = composeHangulFull(L, C, 0);
|
||||
dcps[i] = tombstone; // Mark for deletion.
|
||||
processed_hangul = true;
|
||||
}
|
||||
|
||||
if (processed_hangul) deleted += 1;
|
||||
}
|
||||
|
||||
// If no composition has occurred yet.
|
||||
if (!processed_hangul) {
|
||||
// L, C are not Hangul, so check for primary composite
|
||||
// in the Unicode Character Database.
|
||||
if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| {
|
||||
// We have a primary composite P for L, C.
|
||||
// We must check if P is not in the Full
|
||||
// Composition Exclusions (FCX) list,
|
||||
// preventing it from appearing in any
|
||||
// composed form (NFC, NFKC).
|
||||
if (!self.norm_data.normp_data.isFcx(P)) {
|
||||
dcps[sidx] = P;
|
||||
dcps[i] = tombstone; // Mark for deletion.
|
||||
deleted += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have no deletions. the code point sequence
|
||||
// has been fully composed.
|
||||
if (deleted == 0) {
|
||||
var cstr_list = std.ArrayList(u8).init(allocator);
|
||||
defer cstr_list.deinit();
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
for (dcps) |cp| {
|
||||
if (cp == tombstone) continue; // "Delete"
|
||||
const len = unicode.utf8Encode(cp, &buf) catch unreachable;
|
||||
try cstr_list.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return Result{ .allocator = allocator, .slice = try cstr_list.toOwnedSlice() };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "nfc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
|
||||
}
|
||||
|
||||
test "nfkc" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
|
||||
defer result.deinit();
|
||||
|
||||
try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
|
||||
}
|
||||
|
||||
/// Tests for equality of `a` and `b` after normalizing to NFC.
|
||||
pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool {
|
||||
const norm_result_a = try self.nfc(allocator, a);
|
||||
defer norm_result_a.deinit();
|
||||
const norm_result_b = try self.nfc(allocator, b);
|
||||
defer norm_result_b.deinit();
|
||||
|
||||
return mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
|
||||
}
|
||||
|
||||
test "eql" {
|
||||
const allocator = testing.allocator;
|
||||
var data: NormData = undefined;
|
||||
try NormData.init(&data, allocator);
|
||||
defer data.deinit();
|
||||
const n = Self{ .norm_data = &data };
|
||||
|
||||
try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
|
||||
try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
|
||||
}
|
||||
|
||||
/// Returns true if `str` only contains Latin-1 Supplement
|
||||
/// code points. Uses SIMD if possible.
|
||||
pub fn isLatin1Only(str: []const u8) bool {
|
||||
var cp_iter = CodePointIterator{ .bytes = str };
|
||||
|
||||
const vec_len = simd.suggestVectorLength(u21) orelse return blk: {
|
||||
break :blk while (cp_iter.next()) |cp| {
|
||||
if (cp.code > 256) break false;
|
||||
} else true;
|
||||
};
|
||||
|
||||
const Vec = @Vector(vec_len, u21);
|
||||
|
||||
outer: while (true) {
|
||||
var v1: Vec = undefined;
|
||||
const saved_cp_i = cp_iter.i;
|
||||
|
||||
for (0..vec_len) |i| {
|
||||
if (cp_iter.next()) |cp| {
|
||||
v1[i] = cp.code;
|
||||
} else {
|
||||
cp_iter.i = saved_cp_i;
|
||||
break :outer;
|
||||
}
|
||||
}
|
||||
const v2: Vec = @splat(256);
|
||||
if (@reduce(.Or, v1 > v2)) return false;
|
||||
}
|
||||
|
||||
return while (cp_iter.next()) |cp| {
|
||||
if (cp.code > 256) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
test "isLatin1Only" {
|
||||
const latin1_only = "Hello, World! \u{fe} \u{ff}";
|
||||
try testing.expect(isLatin1Only(latin1_only));
|
||||
const not_latin1_only = "Héllo, World! \u{3d3}";
|
||||
try testing.expect(!isLatin1Only(not_latin1_only));
|
||||
}
|
164
deps/zg/src/PropsData.zig
vendored
Normal file
164
deps/zg/src/PropsData.zig
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
core_s1: []u16 = undefined,
|
||||
core_s2: []u8 = undefined,
|
||||
props_s1: []u16 = undefined,
|
||||
props_s2: []u8 = undefined,
|
||||
num_s1: []u16 = undefined,
|
||||
num_s2: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
// Process DerivedCoreProperties.txt
|
||||
const core_bytes = @embedFile("core_props");
|
||||
var core_fbs = std.io.fixedBufferStream(core_bytes);
|
||||
var core_decomp = decompressor(.raw, core_fbs.reader());
|
||||
var core_reader = core_decomp.reader();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
|
||||
self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
|
||||
errdefer allocator.free(self.core_s1);
|
||||
for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);
|
||||
|
||||
const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
|
||||
self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
|
||||
errdefer allocator.free(self.core_s2);
|
||||
_ = try core_reader.readAll(self.core_s2);
|
||||
|
||||
// Process PropList.txt
|
||||
const props_bytes = @embedFile("props");
|
||||
var props_fbs = std.io.fixedBufferStream(props_bytes);
|
||||
var props_decomp = decompressor(.raw, props_fbs.reader());
|
||||
var props_reader = props_decomp.reader();
|
||||
|
||||
const stage_1_len: u16 = try props_reader.readInt(u16, endian);
|
||||
self.props_s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.props_s1);
|
||||
for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try props_reader.readInt(u16, endian);
|
||||
self.props_s2 = try allocator.alloc(u8, stage_2_len);
|
||||
errdefer allocator.free(self.props_s2);
|
||||
_ = try props_reader.readAll(self.props_s2);
|
||||
|
||||
// Process DerivedNumericType.txt
|
||||
const num_bytes = @embedFile("numeric");
|
||||
var num_fbs = std.io.fixedBufferStream(num_bytes);
|
||||
var num_decomp = decompressor(.raw, num_fbs.reader());
|
||||
var num_reader = num_decomp.reader();
|
||||
|
||||
const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
|
||||
self.num_s1 = try allocator.alloc(u16, num_stage_1_len);
|
||||
errdefer allocator.free(self.num_s1);
|
||||
for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian);
|
||||
|
||||
const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
|
||||
self.num_s2 = try allocator.alloc(u8, num_stage_2_len);
|
||||
errdefer allocator.free(self.num_s2);
|
||||
_ = try num_reader.readAll(self.num_s2);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.core_s1);
|
||||
self.allocator.free(self.core_s2);
|
||||
self.allocator.free(self.props_s1);
|
||||
self.allocator.free(self.props_s2);
|
||||
self.allocator.free(self.num_s1);
|
||||
self.allocator.free(self.num_s2);
|
||||
}
|
||||
|
||||
/// True if `cp` is a mathematical symbol.
|
||||
pub fn isMath(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
|
||||
}
|
||||
|
||||
/// True if `cp` is an alphabetic character.
|
||||
pub fn isAlphabetic(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
|
||||
}
|
||||
|
||||
/// True if `cp` is a valid identifier start character.
|
||||
pub fn isIdStart(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
||||
|
||||
/// True if `cp` is a valid identifier continuation character.
|
||||
pub fn isIdContinue(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
|
||||
}
|
||||
|
||||
/// True if `cp` is a valid extended identifier start character.
|
||||
pub fn isXidStart(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
|
||||
}
|
||||
|
||||
/// True if `cp` is a valid extended identifier continuation character.
|
||||
pub fn isXidContinue(self: Self, cp: u21) bool {
|
||||
return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
|
||||
}
|
||||
|
||||
/// True if `cp` is a whitespace character.
|
||||
pub fn isWhitespace(self: Self, cp: u21) bool {
|
||||
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
|
||||
}
|
||||
|
||||
/// True if `cp` is a hexadecimal digit.
|
||||
pub fn isHexDigit(self: Self, cp: u21) bool {
|
||||
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
|
||||
}
|
||||
|
||||
/// True if `cp` is a diacritic mark.
|
||||
pub fn isDiacritic(self: Self, cp: u21) bool {
|
||||
return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
||||
|
||||
/// True if `cp` is numeric.
|
||||
pub fn isNumeric(self: Self, cp: u21) bool {
|
||||
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
|
||||
}
|
||||
|
||||
/// True if `cp` is a digit.
|
||||
pub fn isDigit(self: Self, cp: u21) bool {
|
||||
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
|
||||
}
|
||||
|
||||
/// True if `cp` is decimal.
|
||||
pub fn isDecimal(self: Self, cp: u21) bool {
|
||||
return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
||||
|
||||
test "Props" {
|
||||
const self = try init(testing.allocator);
|
||||
defer self.deinit();
|
||||
|
||||
try testing.expect(self.isHexDigit('F'));
|
||||
try testing.expect(self.isHexDigit('a'));
|
||||
try testing.expect(self.isHexDigit('8'));
|
||||
try testing.expect(!self.isHexDigit('z'));
|
||||
|
||||
try testing.expect(self.isDiacritic('\u{301}'));
|
||||
try testing.expect(self.isAlphabetic('A'));
|
||||
try testing.expect(!self.isAlphabetic('3'));
|
||||
try testing.expect(self.isMath('+'));
|
||||
|
||||
try testing.expect(self.isNumeric('\u{277f}'));
|
||||
try testing.expect(self.isDigit('\u{2070}'));
|
||||
try testing.expect(self.isDecimal('3'));
|
||||
|
||||
try testing.expect(!self.isNumeric('1'));
|
||||
try testing.expect(!self.isDigit('2'));
|
||||
try testing.expect(!self.isDecimal('g'));
|
||||
}
|
228
deps/zg/src/ScriptsData.zig
vendored
Normal file
228
deps/zg/src/ScriptsData.zig
vendored
Normal file
@@ -0,0 +1,228 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
/// Scripts
|
||||
pub const Script = enum {
|
||||
none,
|
||||
Adlam,
|
||||
Ahom,
|
||||
Anatolian_Hieroglyphs,
|
||||
Arabic,
|
||||
Armenian,
|
||||
Avestan,
|
||||
Balinese,
|
||||
Bamum,
|
||||
Bassa_Vah,
|
||||
Batak,
|
||||
Bengali,
|
||||
Bhaiksuki,
|
||||
Bopomofo,
|
||||
Brahmi,
|
||||
Braille,
|
||||
Buginese,
|
||||
Buhid,
|
||||
Canadian_Aboriginal,
|
||||
Carian,
|
||||
Caucasian_Albanian,
|
||||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
Cypriot,
|
||||
Cypro_Minoan,
|
||||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
Elbasan,
|
||||
Elymaic,
|
||||
Ethiopic,
|
||||
Georgian,
|
||||
Glagolitic,
|
||||
Gothic,
|
||||
Grantha,
|
||||
Greek,
|
||||
Gujarati,
|
||||
Gunjala_Gondi,
|
||||
Gurmukhi,
|
||||
Han,
|
||||
Hangul,
|
||||
Hanifi_Rohingya,
|
||||
Hanunoo,
|
||||
Hatran,
|
||||
Hebrew,
|
||||
Hiragana,
|
||||
Imperial_Aramaic,
|
||||
Inherited,
|
||||
Inscriptional_Pahlavi,
|
||||
Inscriptional_Parthian,
|
||||
Javanese,
|
||||
Kaithi,
|
||||
Kannada,
|
||||
Katakana,
|
||||
Kawi,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
Lao,
|
||||
Latin,
|
||||
Lepcha,
|
||||
Limbu,
|
||||
Linear_A,
|
||||
Linear_B,
|
||||
Lisu,
|
||||
Lycian,
|
||||
Lydian,
|
||||
Mahajani,
|
||||
Makasar,
|
||||
Malayalam,
|
||||
Mandaic,
|
||||
Manichaean,
|
||||
Marchen,
|
||||
Masaram_Gondi,
|
||||
Medefaidrin,
|
||||
Meetei_Mayek,
|
||||
Mende_Kikakui,
|
||||
Meroitic_Cursive,
|
||||
Meroitic_Hieroglyphs,
|
||||
Miao,
|
||||
Modi,
|
||||
Mongolian,
|
||||
Mro,
|
||||
Multani,
|
||||
Myanmar,
|
||||
Nabataean,
|
||||
Nag_Mundari,
|
||||
Nandinagari,
|
||||
New_Tai_Lue,
|
||||
Newa,
|
||||
Nko,
|
||||
Nushu,
|
||||
Nyiakeng_Puachue_Hmong,
|
||||
Ogham,
|
||||
Ol_Chiki,
|
||||
Old_Hungarian,
|
||||
Old_Italic,
|
||||
Old_North_Arabian,
|
||||
Old_Permic,
|
||||
Old_Persian,
|
||||
Old_Sogdian,
|
||||
Old_South_Arabian,
|
||||
Old_Turkic,
|
||||
Old_Uyghur,
|
||||
Oriya,
|
||||
Osage,
|
||||
Osmanya,
|
||||
Pahawh_Hmong,
|
||||
Palmyrene,
|
||||
Pau_Cin_Hau,
|
||||
Phags_Pa,
|
||||
Phoenician,
|
||||
Psalter_Pahlavi,
|
||||
Rejang,
|
||||
Runic,
|
||||
Samaritan,
|
||||
Saurashtra,
|
||||
Sharada,
|
||||
Shavian,
|
||||
Siddham,
|
||||
SignWriting,
|
||||
Sinhala,
|
||||
Sogdian,
|
||||
Sora_Sompeng,
|
||||
Soyombo,
|
||||
Sundanese,
|
||||
Syloti_Nagri,
|
||||
Syriac,
|
||||
Tagalog,
|
||||
Tagbanwa,
|
||||
Tai_Le,
|
||||
Tai_Tham,
|
||||
Tai_Viet,
|
||||
Takri,
|
||||
Tamil,
|
||||
Tangsa,
|
||||
Tangut,
|
||||
Telugu,
|
||||
Thaana,
|
||||
Thai,
|
||||
Tibetan,
|
||||
Tifinagh,
|
||||
Tirhuta,
|
||||
Toto,
|
||||
Ugaritic,
|
||||
Vai,
|
||||
Vithkuqi,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square,
|
||||
};
|
||||
|
||||
allocator: mem.Allocator,
|
||||
s1: []u16 = undefined,
|
||||
s2: []u8 = undefined,
|
||||
s3: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("scripts");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{ .allocator = allocator };
|
||||
|
||||
const s1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, s1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const s2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(u8, s2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
_ = try reader.readAll(self.s2);
|
||||
|
||||
const s3_len: u16 = try reader.readInt(u8, endian);
|
||||
self.s3 = try allocator.alloc(u8, s3_len);
|
||||
errdefer allocator.free(self.s3);
|
||||
_ = try reader.readAll(self.s3);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
self.allocator.free(self.s3);
|
||||
}
|
||||
|
||||
/// Lookup the Script type for `cp`.
|
||||
pub fn script(self: Self, cp: u21) ?Script {
|
||||
const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
|
||||
if (byte == 0) return null;
|
||||
return @enumFromInt(byte);
|
||||
}
|
||||
|
||||
test "script" {
|
||||
const self = try init(std.testing.allocator);
|
||||
defer self.deinit();
|
||||
try testing.expectEqual(Script.Latin, self.script('A').?);
|
||||
}
|
84
deps/zg/src/WidthData.zig
vendored
Normal file
84
deps/zg/src/WidthData.zig
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
|
||||
const GraphemeData = @import("GraphemeData");
|
||||
|
||||
allocator: mem.Allocator,
|
||||
g_data: GraphemeData,
|
||||
s1: []u16 = undefined,
|
||||
s2: []i3 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const in_bytes = @embedFile("dwp");
|
||||
var in_fbs = std.io.fixedBufferStream(in_bytes);
|
||||
var in_decomp = decompressor(.raw, in_fbs.reader());
|
||||
var reader = in_decomp.reader();
|
||||
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{
|
||||
.allocator = allocator,
|
||||
.g_data = try GraphemeData.init(allocator),
|
||||
};
|
||||
errdefer self.g_data.deinit();
|
||||
|
||||
const stage_1_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.s1);
|
||||
for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try reader.readInt(u16, endian);
|
||||
self.s2 = try allocator.alloc(i3, stage_2_len);
|
||||
errdefer allocator.free(self.s2);
|
||||
for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.s1);
|
||||
self.allocator.free(self.s2);
|
||||
self.g_data.deinit();
|
||||
}
|
||||
|
||||
/// codePointWidth returns the number of cells `cp` requires when rendered
|
||||
/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
|
||||
/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
|
||||
/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
|
||||
/// otherwise they return 1.
|
||||
pub fn codePointWidth(self: Self, cp: u21) i3 {
|
||||
return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
|
||||
}
|
||||
|
||||
test "codePointWidth" {
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
|
||||
try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
|
||||
try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
|
||||
|
||||
try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
|
||||
try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
|
||||
|
||||
try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
|
||||
try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
|
||||
try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
|
||||
|
||||
try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
|
||||
|
||||
try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
|
||||
try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
|
||||
try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
|
||||
}
|
33
deps/zg/src/ascii.zig
vendored
Normal file
33
deps/zg/src/ascii.zig
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
const std = @import("std");
|
||||
const simd = std.simd;
|
||||
const testing = std.testing;
|
||||
|
||||
/// Returns true if `str` only contains ASCII bytes. Uses SIMD if possible.
|
||||
pub fn isAsciiOnly(str: []const u8) bool {
|
||||
const vec_len = simd.suggestVectorLength(u8) orelse return for (str) |b| {
|
||||
if (b > 127) break false;
|
||||
} else true;
|
||||
|
||||
const Vec = @Vector(vec_len, u8);
|
||||
var remaining = str;
|
||||
|
||||
while (true) {
|
||||
if (remaining.len < vec_len) return for (remaining) |b| {
|
||||
if (b > 127) break false;
|
||||
} else true;
|
||||
|
||||
const v1 = remaining[0..vec_len].*;
|
||||
const v2: Vec = @splat(127);
|
||||
if (@reduce(.Or, v1 > v2)) return false;
|
||||
remaining = remaining[vec_len..];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
test "isAsciiOnly" {
|
||||
const ascii_only = "Hello, World! 0123456789 !@#$%^&*()_-=+";
|
||||
try testing.expect(isAsciiOnly(ascii_only));
|
||||
const not_ascii_only = "Héllo, World! 0123456789 !@#$%^&*()_-=+";
|
||||
try testing.expect(!isAsciiOnly(not_ascii_only));
|
||||
}
|
118
deps/zg/src/code_point.zig
vendored
Normal file
118
deps/zg/src/code_point.zig
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
const std = @import("std");
|
||||
|
||||
/// `CodePoint` represents a Unicode code point by its code,
|
||||
/// length, and offset in the source bytes.
|
||||
pub const CodePoint = struct {
|
||||
code: u21,
|
||||
len: u3,
|
||||
offset: u32,
|
||||
};
|
||||
|
||||
/// given a small slice of a string, decode the corresponding codepoint
|
||||
pub fn decode(bytes: []const u8, offset: u32) ?CodePoint {
|
||||
// EOS fast path
|
||||
if (bytes.len == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// ASCII fast path
|
||||
if (bytes[0] < 128) {
|
||||
return .{
|
||||
.code = bytes[0],
|
||||
.len = 1,
|
||||
.offset = offset,
|
||||
};
|
||||
}
|
||||
|
||||
var cp = CodePoint{
|
||||
.code = undefined,
|
||||
.len = switch (bytes[0]) {
|
||||
0b1100_0000...0b1101_1111 => 2,
|
||||
0b1110_0000...0b1110_1111 => 3,
|
||||
0b1111_0000...0b1111_0111 => 4,
|
||||
else => {
|
||||
// unicode replacement code point.
|
||||
return .{
|
||||
.code = 0xfffd,
|
||||
.len = 1,
|
||||
.offset = offset,
|
||||
};
|
||||
},
|
||||
},
|
||||
.offset = offset,
|
||||
};
|
||||
|
||||
// Return replacement if we don' have a complete codepoint remaining. Consumes only one byte
|
||||
if (cp.len > bytes.len) {
|
||||
// Unicode replacement code point.
|
||||
return .{
|
||||
.code = 0xfffd,
|
||||
.len = 1,
|
||||
.offset = offset,
|
||||
};
|
||||
}
|
||||
|
||||
const cp_bytes = bytes[0..cp.len];
|
||||
cp.code = switch (cp.len) {
|
||||
2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
|
||||
|
||||
3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
|
||||
(cp_bytes[1] & 0b00111111)) << 6) |
|
||||
(cp_bytes[2] & 0b00111111),
|
||||
|
||||
4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
|
||||
(cp_bytes[1] & 0b00111111)) << 6) |
|
||||
(cp_bytes[2] & 0b00111111)) << 6) |
|
||||
(cp_bytes[3] & 0b00111111),
|
||||
|
||||
else => @panic("CodePointIterator.next invalid code point length."),
|
||||
};
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
/// `Iterator` iterates a string one `CodePoint` at-a-time.
|
||||
pub const Iterator = struct {
|
||||
bytes: []const u8,
|
||||
i: u32 = 0,
|
||||
|
||||
pub fn next(self: *Iterator) ?CodePoint {
|
||||
if (self.i >= self.bytes.len) return null;
|
||||
|
||||
const res = decode(self.bytes[self.i..], self.i);
|
||||
if (res) |cp| {
|
||||
self.i += cp.len;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn peek(self: *Iterator) ?CodePoint {
|
||||
const saved_i = self.i;
|
||||
defer self.i = saved_i;
|
||||
return self.next();
|
||||
}
|
||||
};
|
||||
|
||||
test "decode" {
|
||||
const bytes = "🌩️";
|
||||
const res = decode(bytes, 0);
|
||||
|
||||
if (res) |cp| {
|
||||
try std.testing.expectEqual(@as(u21, 0x1F329), cp.code);
|
||||
try std.testing.expectEqual(4, cp.len);
|
||||
} else {
|
||||
// shouldn't have failed to return
|
||||
try std.testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
||||
test "peek" {
|
||||
var iter = Iterator{ .bytes = "Hi" };
|
||||
|
||||
try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
|
||||
try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
|
||||
try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
|
||||
try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
|
||||
try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
|
||||
}
|
258
deps/zg/src/grapheme.zig
vendored
Normal file
258
deps/zg/src/grapheme.zig
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const unicode = std.unicode;
|
||||
|
||||
const CodePoint = @import("code_point").CodePoint;
|
||||
const CodePointIterator = @import("code_point").Iterator;
|
||||
pub const GraphemeData = @import("GraphemeData");
|
||||
|
||||
/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
|
||||
pub const Grapheme = struct {
|
||||
len: u8,
|
||||
offset: u32,
|
||||
|
||||
/// `bytes` returns the slice of bytes that correspond to
|
||||
/// this grapheme cluster in `src`.
|
||||
pub fn bytes(self: Grapheme, src: []const u8) []const u8 {
|
||||
return src[self.offset..][0..self.len];
|
||||
}
|
||||
};
|
||||
|
||||
/// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.
|
||||
pub const Iterator = struct {
|
||||
buf: [2]?CodePoint = .{ null, null },
|
||||
cp_iter: CodePointIterator,
|
||||
data: *const GraphemeData,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Assumes `src` is valid UTF-8.
|
||||
pub fn init(str: []const u8, data: *const GraphemeData) Self {
|
||||
var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
|
||||
self.advance();
|
||||
return self;
|
||||
}
|
||||
|
||||
fn advance(self: *Self) void {
|
||||
self.buf[0] = self.buf[1];
|
||||
self.buf[1] = self.cp_iter.next();
|
||||
}
|
||||
|
||||
pub fn next(self: *Self) ?Grapheme {
|
||||
self.advance();
|
||||
|
||||
// If no more
|
||||
if (self.buf[0] == null) return null;
|
||||
// If last one
|
||||
if (self.buf[1] == null) return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
|
||||
// If ASCII
|
||||
if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
|
||||
return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
|
||||
}
|
||||
|
||||
const gc_start = self.buf[0].?.offset;
|
||||
var gc_len: u8 = self.buf[0].?.len;
|
||||
var state = State{};
|
||||
|
||||
if (graphemeBreak(
|
||||
self.buf[0].?.code,
|
||||
self.buf[1].?.code,
|
||||
self.data,
|
||||
&state,
|
||||
)) return Grapheme{ .len = gc_len, .offset = gc_start };
|
||||
|
||||
while (true) {
|
||||
self.advance();
|
||||
if (self.buf[0] == null) break;
|
||||
|
||||
gc_len += self.buf[0].?.len;
|
||||
|
||||
if (graphemeBreak(
|
||||
self.buf[0].?.code,
|
||||
if (self.buf[1]) |ncp| ncp.code else 0,
|
||||
self.data,
|
||||
&state,
|
||||
)) break;
|
||||
}
|
||||
|
||||
return Grapheme{ .len = gc_len, .offset = gc_start };
|
||||
}
|
||||
};
|
||||
|
||||
// Predicates
|
||||
fn isBreaker(cp: u21, data: *const GraphemeData) bool {
|
||||
// Extract relevant properties.
|
||||
const cp_gbp_prop = data.gbp(cp);
|
||||
return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
|
||||
}
|
||||
|
||||
// Grapheme break state.
|
||||
pub const State = struct {
|
||||
bits: u3 = 0,
|
||||
|
||||
// Extended Pictographic (emoji)
|
||||
fn hasXpic(self: State) bool {
|
||||
return self.bits & 1 == 1;
|
||||
}
|
||||
fn setXpic(self: *State) void {
|
||||
self.bits |= 1;
|
||||
}
|
||||
fn unsetXpic(self: *State) void {
|
||||
self.bits ^= 1;
|
||||
}
|
||||
|
||||
// Regional Indicatior (flags)
|
||||
fn hasRegional(self: State) bool {
|
||||
return self.bits & 2 == 2;
|
||||
}
|
||||
fn setRegional(self: *State) void {
|
||||
self.bits |= 2;
|
||||
}
|
||||
fn unsetRegional(self: *State) void {
|
||||
self.bits ^= 2;
|
||||
}
|
||||
|
||||
// Indic Conjunct
|
||||
fn hasIndic(self: State) bool {
|
||||
return self.bits & 4 == 4;
|
||||
}
|
||||
fn setIndic(self: *State) void {
|
||||
self.bits |= 4;
|
||||
}
|
||||
fn unsetIndic(self: *State) void {
|
||||
self.bits ^= 4;
|
||||
}
|
||||
};
|
||||
|
||||
/// `graphemeBreak` returns true only if a grapheme break point is required
|
||||
/// between `cp1` and `cp2`. `state` should start out as 0. If calling
|
||||
/// iteratively over a sequence of code points, this function must be called
|
||||
/// IN ORDER on ALL potential breaks in a string.
|
||||
/// Modeled after the API of utf8proc's `utf8proc_grapheme_break_stateful`.
|
||||
/// https://github.com/JuliaStrings/utf8proc/blob/2bbb1ba932f727aad1fab14fafdbc89ff9dc4604/utf8proc.h#L599-L617
|
||||
pub fn graphemeBreak(
|
||||
cp1: u21,
|
||||
cp2: u21,
|
||||
data: *const GraphemeData,
|
||||
state: *State,
|
||||
) bool {
|
||||
// Extract relevant properties.
|
||||
const cp1_gbp_prop = data.gbp(cp1);
|
||||
const cp1_indic_prop = data.indic(cp1);
|
||||
const cp1_is_emoji = data.isEmoji(cp1);
|
||||
|
||||
const cp2_gbp_prop = data.gbp(cp2);
|
||||
const cp2_indic_prop = data.indic(cp2);
|
||||
const cp2_is_emoji = data.isEmoji(cp2);
|
||||
|
||||
// GB11: Emoji Extend* ZWJ x Emoji
|
||||
if (!state.hasXpic() and cp1_is_emoji) state.setXpic();
|
||||
// GB9c: Indic Conjunct Break
|
||||
if (!state.hasIndic() and cp1_indic_prop == .Consonant) state.setIndic();
|
||||
|
||||
// GB3: CR x LF
|
||||
if (cp1 == '\r' and cp2 == '\n') return false;
|
||||
|
||||
// GB4: Control
|
||||
if (isBreaker(cp1, data)) return true;
|
||||
|
||||
// GB11: Emoji Extend* ZWJ x Emoji
|
||||
if (state.hasXpic() and
|
||||
cp1_gbp_prop == .ZWJ and
|
||||
cp2_is_emoji)
|
||||
{
|
||||
state.unsetXpic();
|
||||
return false;
|
||||
}
|
||||
|
||||
// GB9b: x (Extend | ZWJ)
|
||||
if (cp2_gbp_prop == .Extend or cp2_gbp_prop == .ZWJ) return false;
|
||||
|
||||
// GB9a: x Spacing
|
||||
if (cp2_gbp_prop == .SpacingMark) return false;
|
||||
|
||||
// GB9b: Prepend x
|
||||
if (cp1_gbp_prop == .Prepend and !isBreaker(cp2, data)) return false;
|
||||
|
||||
// GB12, GB13: RI x RI
|
||||
if (cp1_gbp_prop == .Regional_Indicator and cp2_gbp_prop == .Regional_Indicator) {
|
||||
if (state.hasRegional()) {
|
||||
state.unsetRegional();
|
||||
return true;
|
||||
} else {
|
||||
state.setRegional();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// GB6: Hangul L x (L|V|LV|VT)
|
||||
if (cp1_gbp_prop == .L) {
|
||||
if (cp2_gbp_prop == .L or
|
||||
cp2_gbp_prop == .V or
|
||||
cp2_gbp_prop == .LV or
|
||||
cp2_gbp_prop == .LVT) return false;
|
||||
}
|
||||
|
||||
// GB7: Hangul (LV | V) x (V | T)
|
||||
if (cp1_gbp_prop == .LV or cp1_gbp_prop == .V) {
|
||||
if (cp2_gbp_prop == .V or
|
||||
cp2_gbp_prop == .T) return false;
|
||||
}
|
||||
|
||||
// GB8: Hangul (LVT | T) x T
|
||||
if (cp1_gbp_prop == .LVT or cp1_gbp_prop == .T) {
|
||||
if (cp2_gbp_prop == .T) return false;
|
||||
}
|
||||
|
||||
// GB9c: Indic Conjunct Break
|
||||
if (state.hasIndic() and
|
||||
cp1_indic_prop == .Consonant and
|
||||
(cp2_indic_prop == .Extend or cp2_indic_prop == .Linker))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (state.hasIndic() and
|
||||
cp1_indic_prop == .Extend and
|
||||
cp2_indic_prop == .Linker)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (state.hasIndic() and
|
||||
(cp1_indic_prop == .Linker or cp1_gbp_prop == .ZWJ) and
|
||||
cp2_indic_prop == .Consonant)
|
||||
{
|
||||
state.unsetIndic();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
test "Segmentation ZWJ and ZWSP emoji sequences" {
|
||||
const seq_1 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
|
||||
const seq_2 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
|
||||
const with_zwj = seq_1 ++ "\u{200D}" ++ seq_2;
|
||||
const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
|
||||
const no_joiner = seq_1 ++ seq_2;
|
||||
|
||||
const data = try GraphemeData.init(std.testing.allocator);
|
||||
defer data.deinit();
|
||||
|
||||
var iter = Iterator.init(with_zwj, &data);
|
||||
|
||||
var i: usize = 0;
|
||||
while (iter.next()) |_| : (i += 1) {}
|
||||
try std.testing.expectEqual(@as(usize, 1), i);
|
||||
|
||||
iter = Iterator.init(with_zwsp, &data);
|
||||
i = 0;
|
||||
while (iter.next()) |_| : (i += 1) {}
|
||||
try std.testing.expectEqual(@as(usize, 3), i);
|
||||
|
||||
iter = Iterator.init(no_joiner, &data);
|
||||
i = 0;
|
||||
while (iter.next()) |_| : (i += 1) {}
|
||||
try std.testing.expectEqual(@as(usize, 2), i);
|
||||
}
|
195
deps/zg/src/unicode_tests.zig
vendored
Normal file
195
deps/zg/src/unicode_tests.zig
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
const std = @import("std");
|
||||
const fmt = std.fmt;
|
||||
const fs = std.fs;
|
||||
const io = std.io;
|
||||
const heap = std.heap;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
const unicode = std.unicode;
|
||||
|
||||
const Grapheme = @import("grapheme").Grapheme;
|
||||
const GraphemeData = @import("grapheme").GraphemeData;
|
||||
const GraphemeIterator = @import("grapheme").Iterator;
|
||||
const Normalize = @import("Normalize");
|
||||
|
||||
test "Unicode normalization tests" {
|
||||
var arena = heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var allocator = arena.allocator();
|
||||
|
||||
var norm_data: Normalize.NormData = undefined;
|
||||
try Normalize.NormData.init(&norm_data, allocator);
|
||||
const n = Normalize{ .norm_data = &norm_data };
|
||||
|
||||
var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
|
||||
defer file.close();
|
||||
var buf_reader = io.bufferedReader(file.reader());
|
||||
const input_stream = buf_reader.reader();
|
||||
|
||||
var line_no: usize = 0;
|
||||
var buf: [4096]u8 = undefined;
|
||||
var cp_buf: [4]u8 = undefined;
|
||||
|
||||
while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
|
||||
line_no += 1;
|
||||
// Skip comments or empty lines.
|
||||
if (line.len == 0 or line[0] == '#' or line[0] == '@') continue;
|
||||
// Iterate over fields.
|
||||
var fields = mem.split(u8, line, ";");
|
||||
var field_index: usize = 0;
|
||||
var input: []u8 = undefined;
|
||||
defer allocator.free(input);
|
||||
|
||||
while (fields.next()) |field| : (field_index += 1) {
|
||||
if (field_index == 0) {
|
||||
var i_buf = std.ArrayList(u8).init(allocator);
|
||||
defer i_buf.deinit();
|
||||
|
||||
var i_fields = mem.split(u8, field, " ");
|
||||
while (i_fields.next()) |s| {
|
||||
const icp = try fmt.parseInt(u21, s, 16);
|
||||
const len = try unicode.utf8Encode(icp, &cp_buf);
|
||||
try i_buf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
|
||||
input = try i_buf.toOwnedSlice();
|
||||
} else if (field_index == 1) {
|
||||
//debug.print("\n*** {s} ***\n", .{line});
|
||||
// NFC, time to test.
|
||||
var w_buf = std.ArrayList(u8).init(allocator);
|
||||
defer w_buf.deinit();
|
||||
|
||||
var w_fields = mem.split(u8, field, " ");
|
||||
while (w_fields.next()) |s| {
|
||||
const wcp = try fmt.parseInt(u21, s, 16);
|
||||
const len = try unicode.utf8Encode(wcp, &cp_buf);
|
||||
try w_buf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
|
||||
const want = w_buf.items;
|
||||
var got = try n.nfc(allocator, input);
|
||||
defer got.deinit();
|
||||
|
||||
try testing.expectEqualStrings(want, got.slice);
|
||||
} else if (field_index == 2) {
|
||||
// NFD, time to test.
|
||||
var w_buf = std.ArrayList(u8).init(allocator);
|
||||
defer w_buf.deinit();
|
||||
|
||||
var w_fields = mem.split(u8, field, " ");
|
||||
while (w_fields.next()) |s| {
|
||||
const wcp = try fmt.parseInt(u21, s, 16);
|
||||
const len = try unicode.utf8Encode(wcp, &cp_buf);
|
||||
try w_buf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
|
||||
const want = w_buf.items;
|
||||
var got = try n.nfd(allocator, input);
|
||||
defer got.deinit();
|
||||
|
||||
try testing.expectEqualStrings(want, got.slice);
|
||||
} else if (field_index == 3) {
|
||||
// NFKC, time to test.
|
||||
var w_buf = std.ArrayList(u8).init(allocator);
|
||||
defer w_buf.deinit();
|
||||
|
||||
var w_fields = mem.split(u8, field, " ");
|
||||
while (w_fields.next()) |s| {
|
||||
const wcp = try fmt.parseInt(u21, s, 16);
|
||||
const len = try unicode.utf8Encode(wcp, &cp_buf);
|
||||
try w_buf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
|
||||
const want = w_buf.items;
|
||||
var got = try n.nfkc(allocator, input);
|
||||
defer got.deinit();
|
||||
|
||||
try testing.expectEqualStrings(want, got.slice);
|
||||
} else if (field_index == 4) {
|
||||
// NFKD, time to test.
|
||||
var w_buf = std.ArrayList(u8).init(allocator);
|
||||
defer w_buf.deinit();
|
||||
|
||||
var w_fields = mem.split(u8, field, " ");
|
||||
while (w_fields.next()) |s| {
|
||||
const wcp = try fmt.parseInt(u21, s, 16);
|
||||
const len = try unicode.utf8Encode(wcp, &cp_buf);
|
||||
try w_buf.appendSlice(cp_buf[0..len]);
|
||||
}
|
||||
|
||||
const want = w_buf.items;
|
||||
const got = try n.nfkd(allocator, input);
|
||||
defer got.deinit();
|
||||
|
||||
try testing.expectEqualStrings(want, got.slice);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test "Segmentation GraphemeIterator" {
|
||||
const allocator = std.testing.allocator;
|
||||
var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
|
||||
defer file.close();
|
||||
var buf_reader = std.io.bufferedReader(file.reader());
|
||||
var input_stream = buf_reader.reader();
|
||||
|
||||
const data = try GraphemeData.init(allocator);
|
||||
defer data.deinit();
|
||||
|
||||
var buf: [4096]u8 = undefined;
|
||||
var line_no: usize = 1;
|
||||
|
||||
while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) {
|
||||
// Skip comments or empty lines.
|
||||
if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue;
|
||||
|
||||
// Clean up.
|
||||
var line = std.mem.trimLeft(u8, raw, "÷ ");
|
||||
if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| {
|
||||
line = line[0..octo];
|
||||
}
|
||||
// Iterate over fields.
|
||||
var want = std.ArrayList(Grapheme).init(allocator);
|
||||
defer want.deinit();
|
||||
|
||||
var all_bytes = std.ArrayList(u8).init(allocator);
|
||||
defer all_bytes.deinit();
|
||||
|
||||
var graphemes = std.mem.split(u8, line, " ÷ ");
|
||||
var bytes_index: u32 = 0;
|
||||
|
||||
while (graphemes.next()) |field| {
|
||||
var code_points = std.mem.split(u8, field, " ");
|
||||
var cp_buf: [4]u8 = undefined;
|
||||
var cp_index: u32 = 0;
|
||||
var gc_len: u8 = 0;
|
||||
|
||||
while (code_points.next()) |code_point| {
|
||||
if (std.mem.eql(u8, code_point, "×")) continue;
|
||||
const cp: u21 = try std.fmt.parseInt(u21, code_point, 16);
|
||||
const len = try unicode.utf8Encode(cp, &cp_buf);
|
||||
try all_bytes.appendSlice(cp_buf[0..len]);
|
||||
cp_index += len;
|
||||
gc_len += len;
|
||||
}
|
||||
|
||||
try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
|
||||
bytes_index += cp_index;
|
||||
}
|
||||
|
||||
// std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
|
||||
var iter = GraphemeIterator.init(all_bytes.items, &data);
|
||||
|
||||
// Chaeck.
|
||||
for (want.items) |want_gc| {
|
||||
const got_gc = (iter.next()).?;
|
||||
try std.testing.expectEqualStrings(
|
||||
want_gc.bytes(all_bytes.items),
|
||||
got_gc.bytes(all_bytes.items),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
92
deps/zg/unicode_license/UnicodeLicenseAgreement.html
vendored
Normal file
92
deps/zg/unicode_license/UnicodeLicenseAgreement.html
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<!-- saved from url=(0036)https://www.unicode.org/license.html -->
|
||||
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
|
||||
|
||||
<meta http-equiv="Content-Language" content="en-us">
|
||||
<meta name="keywords" content="Unicode Standard, copyright">
|
||||
<meta name="ProgId" content="FrontPage.Editor.Document">
|
||||
<title>Unicode License Agreement</title>
|
||||
<link rel="stylesheet" type="text/css" href="standard_styles.css">
|
||||
|
||||
<style type="text/css">
|
||||
pre {
|
||||
FONT-FAMILY: Arial, Geneva, sans-serif;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
|
||||
<body text="#330000">
|
||||
|
||||
<table width="100%" cellpadding="0" cellspacing="0" border="0">
|
||||
<tbody><tr>
|
||||
|
||||
<!-- BEGIN CONTENTS -->
|
||||
|
||||
<td>
|
||||
|
||||
|
||||
<blockquote>
|
||||
<h2><a name="License">UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE</a></h2>
|
||||
|
||||
<pre>See <a href="https://www.unicode.org/copyright.html">Terms of Use</a> for definitions of Unicode Inc.'s
|
||||
Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2021 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
</pre>
|
||||
|
||||
</blockquote>
|
||||
|
||||
|
||||
|
||||
</td>
|
||||
|
||||
</tr>
|
||||
|
||||
</tbody></table>
|
||||
|
||||
|
||||
</body></html>
|
218
deps/zg/unicode_license/standard_style.css
vendored
Normal file
218
deps/zg/unicode_license/standard_style.css
vendored
Normal file
@@ -0,0 +1,218 @@
|
||||
BODY {
|
||||
margin: 0; COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
|
||||
}
|
||||
div.body { margin: 1em}
|
||||
P {
|
||||
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
|
||||
}
|
||||
TD {
|
||||
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe
|
||||
}
|
||||
LI {
|
||||
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #fffffe;
|
||||
margin-top: 0.25em; margin-bottom: 0.25em
|
||||
}
|
||||
A:link {
|
||||
COLOR: #CC0000; text-decoration:none;
|
||||
}
|
||||
A:visited {
|
||||
COLOR: #880000; text-decoration:none;
|
||||
}
|
||||
A:active {
|
||||
COLOR: green
|
||||
}
|
||||
A:hover {
|
||||
text-decoration:underline;
|
||||
}
|
||||
H1 {
|
||||
FONT-WEIGHT: bold
|
||||
}
|
||||
H2 {
|
||||
FONT-WEIGHT: bold
|
||||
}
|
||||
H3 {
|
||||
FONT-WEIGHT: bold
|
||||
}
|
||||
H4 {
|
||||
FONT-WEIGHT: bold
|
||||
}
|
||||
H1 {
|
||||
MARGIN-TOP: 12px; FONT-SIZE: 180%; FONT-FAMILY: Arial, Geneva, sans-serif; TEXT-ALIGN: center
|
||||
}
|
||||
H2 {
|
||||
MARGIN-TOP: 2em; FONT-SIZE: 120%; LINE-HEIGHT: 100%; FONT-FAMILY: Arial, Geneva, sans-serif
|
||||
}
|
||||
H3 {
|
||||
MARGIN-TOP: 2em; FONT-SIZE: 105%; FONT-FAMILY: Arial, Geneva, sans-serif
|
||||
}
|
||||
H4 {
|
||||
MARGIN-TOP: 2em; FONT-SIZE: 95%; FONT-FAMILY: Arial, Geneva, sans-serif
|
||||
}
|
||||
TD.bar {
|
||||
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: right
|
||||
}
|
||||
P.bar {
|
||||
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: right
|
||||
}
|
||||
TD.icon {
|
||||
PADDING-RIGHT: 2px; PADDING-LEFT: 2px; FONT-WEIGHT: bold; FONT-SIZE: 100%; PADDING-BOTTOM: 2px; COLOR: #fffffe; PADDING-TOP: 2px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: left
|
||||
}
|
||||
TD.gray {
|
||||
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-SIZE: 50%; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; BACKGROUND-COLOR: #999999
|
||||
}
|
||||
A.bar {
|
||||
FONT-SIZE: 100%
|
||||
}
|
||||
A.bar:link {
|
||||
FONT-SIZE: 90%; COLOR: #fffffe
|
||||
}
|
||||
A.bar:visited {
|
||||
FONT-SIZE: 90%; COLOR: #fffffe
|
||||
}
|
||||
A.bar:active {
|
||||
FONT-SIZE: 90%; COLOR: #ff3333
|
||||
}
|
||||
A.bar:hover {
|
||||
FONT-SIZE: 90%; COLOR: #ff3333
|
||||
}
|
||||
TD.navCol {
|
||||
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; PADDING-BOTTOM: 4px; COLOR: #330000; PADDING-TOP: 4px; BACKGROUND-COLOR: #f0e0c0
|
||||
}
|
||||
TABLE.navColTable {
|
||||
COLOR: #330000; BACKGROUND-COLOR: #f0e0c0
|
||||
}
|
||||
TD.navColTitle {
|
||||
PADDING-RIGHT: 4px; PADDING-LEFT: 4px; FONT-WEIGHT: bold; FONT-SIZE: 95%; PADDING-BOTTOM: 4px; COLOR: #fffffe; PADDING-TOP: 4px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000; TEXT-ALIGN: center
|
||||
}
|
||||
TD.navColCell {
|
||||
FONT-SIZE: 90%; BACKGROUND-COLOR: #f0e0c0
|
||||
}
|
||||
TD.currentPage {
|
||||
FONT-WEIGHT: bold; FONT-SIZE: 90%; FONT-STYLE: italic; BACKGROUND-COLOR: #f0e0c0
|
||||
}
|
||||
TD.contents {
|
||||
|
||||
}
|
||||
TABLE.sidebar {
|
||||
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FLOAT: right; PADDING-BOTTOM: 0px; MARGIN: 4px; MARGIN-LEFT: 1em; WIDTH: 40%; COLOR: #000000; PADDING-TOP: 0px; BACKGROUND-COLOR: #990000
|
||||
}
|
||||
TD.sidebarTitle {
|
||||
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-WEIGHT: bold; FONT-SIZE: 95%; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; FONT-FAMILY: Arial, Geneva, sans-serif; BACKGROUND-COLOR: #990000
|
||||
}
|
||||
TD.sidebar {
|
||||
PADDING-RIGHT: 2px; PADDING-LEFT: 2px; FONT-SIZE: 90%; PADDING-BOTTOM: 2px; COLOR: #000000; PADDING-TOP: 2px; BACKGROUND-COLOR: #f0e0c0
|
||||
}
|
||||
P.q {
|
||||
FONT-WEIGHT: bold; FONT-STYLE: italic; FONT-FAMILY: 'Century Schoolbook', serif
|
||||
}
|
||||
P.a {
|
||||
MARGIN-LEFT: 16px; MARGIN-RIGHT: 16px; FONT-FAMILY: 'Century Schoolbook', serif
|
||||
}
|
||||
P.source {
|
||||
FONT-SIZE: 90%; MARGIN-RIGHT: 16px; FONT-FAMILY: 'Century Schoolbook', serif; TEXT-ALIGN: right
|
||||
}
|
||||
UNKNOWN {
|
||||
COLOR: #ffffee
|
||||
}
|
||||
TABLE.light {
|
||||
COLOR: #330000; BACKGROUND-COLOR: #ffffee
|
||||
}
|
||||
TD.dark {
|
||||
FONT-SIZE: 200%; COLOR: #ffffee; FONT-FAMILY: 'Century Schoolbook', serif; BACKGROUND-COLOR: #aa0000
|
||||
}
|
||||
H1 {
|
||||
FONT-SIZE: 150%; MARGIN: 2px 0px; LINE-HEIGHT: 100%; FONT-FAMILY: Arial, Geneva, sans-serif; TEXT-ALIGN: center
|
||||
}
|
||||
H4 {
|
||||
COLOR: #000000; FONT-FAMILY: Arial, Geneva, sans-serif; FONT-WEIGHT: bold; FONT-SIZE: 95%; BACKGROUND-COLOR: #fffffe;
|
||||
margin-bottom:-15px
|
||||
}
|
||||
TD.head {
|
||||
PADDING-RIGHT: 0px; PADDING-LEFT: 0px; FONT-SIZE: 100%; FONT-WEIGHT: bold; PADDING-BOTTOM: 0px; COLOR: #fffffe; PADDING-TOP: 0px; BACKGROUND-COLOR: #999999
|
||||
}
|
||||
UL.one {
|
||||
FONT-SIZE: 90%; MARGIN-LEFT:20px; TEXT-DECORATION: none; LIST-STYLE-TYPE: none
|
||||
}
|
||||
UL.two {
|
||||
FONT-SIZE: 85%; TEXT-DECORATION: none; TEXT-ALIGN: left; LIST-STYLE-TYPE: none; MARGIN-LEFT:20px
|
||||
}
|
||||
|
||||
span.changedspan { background-color: #FFFF00; border-style: dotted; border-width: 1px }
|
||||
span.removedspan { text-decoration: line-through; background-color: #FFFF00; border-style: dotted; border-width: 1px }
|
||||
|
||||
table.simple { border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
border-collapse:collapse; padding:0.2em; font-size:1em}
|
||||
table.simple th { border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
font-weight:bold; padding:5px; text-align: left; }
|
||||
table.simple td {border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
padding:5px; text-align: left; }
|
||||
|
||||
|
||||
table.subtle { border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
border-collapse:collapse; padding:0.2em; font-size:1em}
|
||||
table.subtle th { border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
font-weight:bold; padding:5px; text-align: left; }
|
||||
table.subtle td {border-width:1px; border-style:solid; border-color:#A0A0A0;
|
||||
padding:5px; text-align: left; }
|
||||
|
||||
|
||||
table.subtle-nb { border-style:none; border-width:0; border-collapse:collapse; }
|
||||
table.subtle-nb th { border:solid 1px #F2F2F2; font-weight:bold; padding:5px; text-align:left; }
|
||||
table.subtle-nb td { border-style:none; font-weight:normal; padding:5px; text-align:left; }
|
||||
|
||||
table.subtle-nb table.subtle th { border-width:1px; border-style:solid; border-color:#A0A0A0; }
|
||||
table.subtle-nb table.subtle td { border-width:1px; border-style:solid; border-color:#A0A0A0; }
|
||||
table.subtle-nb table.simple th { color: #000000; background-color:#FFFFFF; border-width:1px; border-style:solid;
|
||||
border-color:#A0A0A0; }
|
||||
table.subtle-nb table.simple td { border-width:1px; border-style:solid; border-color:#A0A0A0; }
|
||||
table.subtle table.simple th { color: #000000; background-color:#FFFFFF; }
|
||||
|
||||
table.subtle th { color: #606060; background-color:#FAFAFA;}
|
||||
table.subtle-nb th { color: #808080; background-color:#F8F8F8;}
|
||||
|
||||
table.subtle th p { color: #808080; background-color:#F8F8F8; }
|
||||
table.subtle-nb tr th p { color: #808080; background-color:#F8F8F8; }
|
||||
|
||||
table.simple th p { margin:0; }
|
||||
table.subtle th p { margin:0; }
|
||||
table.subtle-nb th p { margin:0; }
|
||||
|
||||
|
||||
/* first-child selector only works in IE if DOCTYPE has a URL (standards mode) */
|
||||
/* the following remove space above first and below last paragraph (or list) inside a table cell, but preserve interparagraph spacing */
|
||||
|
||||
table.simple td>p:first-child { margin: 0; }
|
||||
table.simple td>p { margin-top: 1.5em; }
|
||||
|
||||
table.subtle td>p:first-child { margin:0; }
|
||||
table.subtle td>p { margin-top:1.5em; }
|
||||
|
||||
table.subtle-nb td>p:first-child { margin:0; }
|
||||
table.subtle-nb td>p { margin-top:1.5em; }
|
||||
|
||||
table.simple td>ul:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.simple td>ol:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.simple td>ul { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
table.simple td>ol { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
|
||||
table.subtle td>ul:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.subtle td>ol:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.subtle td>ul { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
table.subtle td>ol { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
|
||||
table.subtle-nb td>ul:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.subtle-nb td>ol:first-child { margin-top:0; margin-bottom:0; }
|
||||
table.subtle-nb td>ul { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
table.subtle-nb td>ol { margin-top:1.5em; margin-bottom:0.5em; }
|
||||
|
||||
|
||||
/* hanging indent, so wide navcolcell items don't look like they are two entries on wrapping */
|
||||
/* the value of .6 is chosen to work with navColCells that start with a manually added bullet character */
|
||||
|
||||
td.navColCell:first-child { padding-left:0.6em; text-indent: -.6em; }
|
||||
|
||||
/* hoisted from reports.css */
|
||||
|
||||
.changed { background-color: #FFFF00; border-style: dotted; border-width: 1px; }
|
||||
.removed { text-decoration: line-through; background-color: #FFFF00; border-style: dotted; border-width: 1px; }
|
||||
.reviewnote { background-color: #FFFF80; color: #CC0000; border-style: dashed; border-width: 1px; }
|
Reference in New Issue
Block a user