Files
rotint/deps/zg/src/GenCatData.zig

172 lines
4.1 KiB
Zig
Raw Normal View History

const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
/// General Category
pub const Gc = enum {
Cc, // Other, Control
Cf, // Other, Format
Cn, // Other, Unassigned
Co, // Other, Private Use
Cs, // Other, Surrogate
Ll, // Letter, Lowercase
Lm, // Letter, Modifier
Lo, // Letter, Other
Lu, // Letter, Uppercase
Lt, // Letter, Titlecase
Mc, // Mark, Spacing Combining
Me, // Mark, Enclosing
Mn, // Mark, Non-Spacing
Nd, // Number, Decimal Digit
Nl, // Number, Letter
No, // Number, Other
Pc, // Punctuation, Connector
Pd, // Punctuation, Dash
Pe, // Punctuation, Close
Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
Po, // Punctuation, Other
Ps, // Punctuation, Open
Sc, // Symbol, Currency
Sk, // Symbol, Modifier
Sm, // Symbol, Math
So, // Symbol, Other
Zl, // Separator, Line
Zp, // Separator, Paragraph
Zs, // Separator, Space
};
allocator: mem.Allocator,
s1: []u16 = undefined,
s2: []u5 = undefined,
s3: []u5 = undefined,
const Self = @This();
pub fn init(allocator: mem.Allocator) !Self {
const decompressor = compress.flate.inflate.decompressor;
const in_bytes = @embedFile("gencat");
var in_fbs = std.io.fixedBufferStream(in_bytes);
var in_decomp = decompressor(.raw, in_fbs.reader());
var reader = in_decomp.reader();
const endian = builtin.cpu.arch.endian();
var self = Self{ .allocator = allocator };
const s1_len: u16 = try reader.readInt(u16, endian);
self.s1 = try allocator.alloc(u16, s1_len);
errdefer allocator.free(self.s1);
for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
const s2_len: u16 = try reader.readInt(u16, endian);
self.s2 = try allocator.alloc(u5, s2_len);
errdefer allocator.free(self.s2);
for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
const s3_len: u16 = try reader.readInt(u8, endian);
self.s3 = try allocator.alloc(u5, s3_len);
errdefer allocator.free(self.s3);
for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
return self;
}
pub fn deinit(self: *const Self) void {
self.allocator.free(self.s1);
self.allocator.free(self.s2);
self.allocator.free(self.s3);
}
/// Lookup the General Category for `cp`.
pub fn gc(self: Self, cp: u21) Gc {
return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
}
/// True if `cp` has an C general category.
pub fn isControl(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Cc,
.Cf,
.Cn,
.Co,
.Cs,
=> true,
else => false,
};
}
/// True if `cp` has an L general category.
pub fn isLetter(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Ll,
.Lm,
.Lo,
.Lu,
.Lt,
=> true,
else => false,
};
}
/// True if `cp` has an M general category.
pub fn isMark(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Mc,
.Me,
.Mn,
=> true,
else => false,
};
}
/// True if `cp` has an N general category.
pub fn isNumber(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Nd,
.Nl,
.No,
=> true,
else => false,
};
}
/// True if `cp` has an P general category.
pub fn isPunctuation(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Pc,
.Pd,
.Pe,
.Pf,
.Pi,
.Po,
.Ps,
=> true,
else => false,
};
}
/// True if `cp` has an S general category.
pub fn isSymbol(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Sc,
.Sk,
.Sm,
.So,
=> true,
else => false,
};
}
/// True if `cp` has an Z general category.
pub fn isSeparator(self: Self, cp: u21) bool {
return switch (self.gc(cp)) {
.Zl,
.Zp,
.Zs,
=> true,
else => false,
};
}