init
I will never get tired of vendoring dependencies. ha ha. It is possible I am insane. I had to do a lot of pruning to get these not to be ridiculous (especially the unicode data, which had nearly 1 million lines of... stuff).
This commit is contained in:
202
deps/zg/src/CaseData.zig
vendored
Normal file
202
deps/zg/src/CaseData.zig
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const compress = std.compress;
|
||||
const mem = std.mem;
|
||||
const testing = std.testing;
|
||||
const unicode = std.unicode;
|
||||
|
||||
const CodePointIterator = @import("code_point").Iterator;
|
||||
|
||||
allocator: mem.Allocator,
|
||||
case_map: [][2]u21,
|
||||
prop_s1: []u16 = undefined,
|
||||
prop_s2: []u8 = undefined,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(allocator: mem.Allocator) !Self {
|
||||
const decompressor = compress.flate.inflate.decompressor;
|
||||
const endian = builtin.cpu.arch.endian();
|
||||
|
||||
var self = Self{
|
||||
.allocator = allocator,
|
||||
.case_map = try allocator.alloc([2]u21, 0x110000),
|
||||
};
|
||||
errdefer allocator.free(self.case_map);
|
||||
|
||||
for (0..0x110000) |i| {
|
||||
const cp: u21 = @intCast(i);
|
||||
self.case_map[cp] = .{ cp, cp };
|
||||
}
|
||||
|
||||
// Uppercase
|
||||
const upper_bytes = @embedFile("upper");
|
||||
var upper_fbs = std.io.fixedBufferStream(upper_bytes);
|
||||
var upper_decomp = decompressor(.raw, upper_fbs.reader());
|
||||
var upper_reader = upper_decomp.reader();
|
||||
|
||||
while (true) {
|
||||
const cp = try upper_reader.readInt(i24, endian);
|
||||
if (cp == 0) break;
|
||||
const diff = try upper_reader.readInt(i24, endian);
|
||||
self.case_map[@intCast(cp)][0] = @intCast(cp + diff);
|
||||
}
|
||||
|
||||
// Lowercase
|
||||
const lower_bytes = @embedFile("lower");
|
||||
var lower_fbs = std.io.fixedBufferStream(lower_bytes);
|
||||
var lower_decomp = decompressor(.raw, lower_fbs.reader());
|
||||
var lower_reader = lower_decomp.reader();
|
||||
|
||||
while (true) {
|
||||
const cp = try lower_reader.readInt(i24, endian);
|
||||
if (cp == 0) break;
|
||||
const diff = try lower_reader.readInt(i24, endian);
|
||||
self.case_map[@intCast(cp)][1] = @intCast(cp + diff);
|
||||
}
|
||||
|
||||
// Case properties
|
||||
const cp_bytes = @embedFile("case_prop");
|
||||
var cp_fbs = std.io.fixedBufferStream(cp_bytes);
|
||||
var cp_decomp = decompressor(.raw, cp_fbs.reader());
|
||||
var cp_reader = cp_decomp.reader();
|
||||
|
||||
const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
|
||||
self.prop_s1 = try allocator.alloc(u16, stage_1_len);
|
||||
errdefer allocator.free(self.prop_s1);
|
||||
for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian);
|
||||
|
||||
const stage_2_len: u16 = try cp_reader.readInt(u16, endian);
|
||||
self.prop_s2 = try allocator.alloc(u8, stage_2_len);
|
||||
errdefer allocator.free(self.prop_s2);
|
||||
_ = try cp_reader.readAll(self.prop_s2);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
self.allocator.free(self.case_map);
|
||||
self.allocator.free(self.prop_s1);
|
||||
self.allocator.free(self.prop_s2);
|
||||
}
|
||||
|
||||
// Returns true if `cp` is either upper, lower, or title case.
|
||||
pub fn isCased(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
|
||||
}
|
||||
|
||||
// Returns true if `cp` is uppercase.
|
||||
pub fn isUpper(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
|
||||
}
|
||||
|
||||
/// Returns true if `str` is all uppercase.
|
||||
pub fn isUpperStr(self: Self, str: []const u8) bool {
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
|
||||
return while (iter.next()) |cp| {
|
||||
if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
test "isUpperStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!"));
|
||||
try testing.expect(!cd.isUpperStr("hello, world 2112!"));
|
||||
try testing.expect(!cd.isUpperStr("Hello, World 2112!"));
|
||||
}
|
||||
|
||||
/// Returns uppercase mapping for `cp`.
|
||||
pub fn toUpper(self: Self, cp: u21) u21 {
|
||||
return self.case_map[cp][0];
|
||||
}
|
||||
|
||||
/// Returns a new string with all letters in uppercase.
|
||||
/// Caller must free returned bytes with `allocator`.
|
||||
pub fn toUpperStr(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
) ![]u8 {
|
||||
var bytes = std.ArrayList(u8).init(allocator);
|
||||
defer bytes.deinit();
|
||||
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
while (iter.next()) |cp| {
|
||||
const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf);
|
||||
try bytes.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return try bytes.toOwnedSlice();
|
||||
}
|
||||
|
||||
test "toUpperStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!");
|
||||
defer testing.allocator.free(uppered);
|
||||
try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
|
||||
}
|
||||
|
||||
// Returns true if `cp` is lowercase.
|
||||
pub fn isLower(self: Self, cp: u21) bool {
|
||||
return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
|
||||
}
|
||||
|
||||
/// Returns true if `str` is all lowercase.
|
||||
pub fn isLowerStr(self: Self, str: []const u8) bool {
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
|
||||
return while (iter.next()) |cp| {
|
||||
if (self.isCased(cp.code) and !self.isLower(cp.code)) break false;
|
||||
} else true;
|
||||
}
|
||||
|
||||
test "isLowerStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
try testing.expect(cd.isLowerStr("hello, world 2112!"));
|
||||
try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!"));
|
||||
try testing.expect(!cd.isLowerStr("Hello, World 2112!"));
|
||||
}
|
||||
|
||||
/// Returns lowercase mapping for `cp`.
|
||||
pub fn toLower(self: Self, cp: u21) u21 {
|
||||
return self.case_map[cp][1];
|
||||
}
|
||||
|
||||
/// Returns a new string with all letters in lowercase.
|
||||
/// Caller must free returned bytes with `allocator`.
|
||||
pub fn toLowerStr(
|
||||
self: Self,
|
||||
allocator: mem.Allocator,
|
||||
str: []const u8,
|
||||
) ![]u8 {
|
||||
var bytes = std.ArrayList(u8).init(allocator);
|
||||
defer bytes.deinit();
|
||||
|
||||
var iter = CodePointIterator{ .bytes = str };
|
||||
var buf: [4]u8 = undefined;
|
||||
|
||||
while (iter.next()) |cp| {
|
||||
const len = try unicode.utf8Encode(self.toLower(cp.code), &buf);
|
||||
try bytes.appendSlice(buf[0..len]);
|
||||
}
|
||||
|
||||
return try bytes.toOwnedSlice();
|
||||
}
|
||||
|
||||
test "toLowerStr" {
|
||||
const cd = try init(testing.allocator);
|
||||
defer cd.deinit();
|
||||
|
||||
const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!");
|
||||
defer testing.allocator.free(lowered);
|
||||
try testing.expectEqualStrings("hello, world 2112!", lowered);
|
||||
}
|
Reference in New Issue
Block a user