mirror of
https://github.com/ziglang/zig.git
synced 2026-01-23 15:55:28 +00:00
Part of #19063. Primarily, this moves Aro from deps/ to lib/compiler/ so that it can be lazily compiled from source. src/aro_translate_c.zig is moved to lib/compiler/aro_translate_c.zig and some of Zig CLI logic moved to a main() function there. aro_translate_c.zig becomes the "common" import for clang-based translate-c. Not all of the compiler was able to be detangled from Aro, however, so it still, for now, remains being compiled with the main compiler sources due to the clang-based translate-c depending on it. Once aro-based translate-c achieves feature parity with the clang-based translate-c implementation, the clang-based one can be removed from Zig. Aro made it unnecessarily difficult to depend on with these .def files and all these Zig module requirements. I looked at the .def files and made these observations: - The canonical source is llvm .def files. - Therefore there is an update process to sync with llvm that involves regenerating the .def files in Aro. - Therefore you might as well just regenerate the .zig files directly and check those into Aro. - Also with a small amount of tinkering, the file size on disk of these generated .zig files can be made many times smaller, without compromising type safety in the usage of the data. This would make things much easier on Zig as downstream project, particularly we could remove those pesky stubs when bootstrapping. I have gone ahead with these changes since they unblock me and I will have a chat with Vexu to see what he thinks.
1112 lines
30 KiB
Zig
1112 lines
30 KiB
Zig
//! This module provides functions for classifying characters according to
|
|
//! various C standards. All classification routines *do not* consider
|
|
//! characters from the basic character set; it is assumed those will be
|
|
//! checked separately
|
|
//! isXidStart and isXidContinue are adapted from https://github.com/dtolnay/unicode-ident
|
|
|
|
const assert = @import("std").debug.assert;
|
|
const tables = @import("char_info/identifier_tables.zig");
|
|
|
|
/// C11 Standard Annex D
|
|
pub fn isC11IdChar(codepoint: u21) bool {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
// 1
|
|
0x00A8,
|
|
0x00AA,
|
|
0x00AD,
|
|
0x00AF,
|
|
0x00B2...0x00B5,
|
|
0x00B7...0x00BA,
|
|
0x00BC...0x00BE,
|
|
0x00C0...0x00D6,
|
|
0x00D8...0x00F6,
|
|
0x00F8...0x00FF,
|
|
|
|
// 2
|
|
0x0100...0x167F,
|
|
0x1681...0x180D,
|
|
0x180F...0x1FFF,
|
|
|
|
// 3
|
|
0x200B...0x200D,
|
|
0x202A...0x202E,
|
|
0x203F...0x2040,
|
|
0x2054,
|
|
0x2060...0x206F,
|
|
|
|
// 4
|
|
0x2070...0x218F,
|
|
0x2460...0x24FF,
|
|
0x2776...0x2793,
|
|
0x2C00...0x2DFF,
|
|
0x2E80...0x2FFF,
|
|
|
|
// 5
|
|
0x3004...0x3007,
|
|
0x3021...0x302F,
|
|
0x3031...0x303F,
|
|
|
|
// 6
|
|
0x3040...0xD7FF,
|
|
|
|
// 7
|
|
0xF900...0xFD3D,
|
|
0xFD40...0xFDCF,
|
|
0xFDF0...0xFE44,
|
|
0xFE47...0xFFFD,
|
|
|
|
// 8
|
|
0x10000...0x1FFFD,
|
|
0x20000...0x2FFFD,
|
|
0x30000...0x3FFFD,
|
|
0x40000...0x4FFFD,
|
|
0x50000...0x5FFFD,
|
|
0x60000...0x6FFFD,
|
|
0x70000...0x7FFFD,
|
|
0x80000...0x8FFFD,
|
|
0x90000...0x9FFFD,
|
|
0xA0000...0xAFFFD,
|
|
0xB0000...0xBFFFD,
|
|
0xC0000...0xCFFFD,
|
|
0xD0000...0xDFFFD,
|
|
0xE0000...0xEFFFD,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
/// C99 Standard Annex D
|
|
pub fn isC99IdChar(codepoint: u21) bool {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
// Latin
|
|
0x00AA,
|
|
0x00BA,
|
|
0x00C0...0x00D6,
|
|
0x00D8...0x00F6,
|
|
0x00F8...0x01F5,
|
|
0x01FA...0x0217,
|
|
0x0250...0x02A8,
|
|
0x1E00...0x1E9B,
|
|
0x1EA0...0x1EF9,
|
|
0x207F,
|
|
|
|
// Greek
|
|
0x0386,
|
|
0x0388...0x038A,
|
|
0x038C,
|
|
0x038E...0x03A1,
|
|
0x03A3...0x03CE,
|
|
0x03D0...0x03D6,
|
|
0x03DA,
|
|
0x03DC,
|
|
0x03DE,
|
|
0x03E0,
|
|
0x03E2...0x03F3,
|
|
0x1F00...0x1F15,
|
|
0x1F18...0x1F1D,
|
|
0x1F20...0x1F45,
|
|
0x1F48...0x1F4D,
|
|
0x1F50...0x1F57,
|
|
0x1F59,
|
|
0x1F5B,
|
|
0x1F5D,
|
|
0x1F5F...0x1F7D,
|
|
0x1F80...0x1FB4,
|
|
0x1FB6...0x1FBC,
|
|
0x1FC2...0x1FC4,
|
|
0x1FC6...0x1FCC,
|
|
0x1FD0...0x1FD3,
|
|
0x1FD6...0x1FDB,
|
|
0x1FE0...0x1FEC,
|
|
0x1FF2...0x1FF4,
|
|
0x1FF6...0x1FFC,
|
|
|
|
// Cyrillic
|
|
0x0401...0x040C,
|
|
0x040E...0x044F,
|
|
0x0451...0x045C,
|
|
0x045E...0x0481,
|
|
0x0490...0x04C4,
|
|
0x04C7...0x04C8,
|
|
0x04CB...0x04CC,
|
|
0x04D0...0x04EB,
|
|
0x04EE...0x04F5,
|
|
0x04F8...0x04F9,
|
|
|
|
// Armenian
|
|
0x0531...0x0556,
|
|
0x0561...0x0587,
|
|
|
|
// Hebrew
|
|
0x05B0...0x05B9,
|
|
0x05BB...0x05BD,
|
|
0x05BF,
|
|
0x05C1...0x05C2,
|
|
0x05D0...0x05EA,
|
|
0x05F0...0x05F2,
|
|
|
|
// Arabic
|
|
0x0621...0x063A,
|
|
0x0640...0x0652,
|
|
0x0670...0x06B7,
|
|
0x06BA...0x06BE,
|
|
0x06C0...0x06CE,
|
|
0x06D0...0x06DC,
|
|
0x06E5...0x06E8,
|
|
0x06EA...0x06ED,
|
|
|
|
// Devanagari
|
|
0x0901...0x0903,
|
|
0x0905...0x0939,
|
|
0x093E...0x094D,
|
|
0x0950...0x0952,
|
|
0x0958...0x0963,
|
|
|
|
// Bengali
|
|
0x0981...0x0983,
|
|
0x0985...0x098C,
|
|
0x098F...0x0990,
|
|
0x0993...0x09A8,
|
|
0x09AA...0x09B0,
|
|
0x09B2,
|
|
0x09B6...0x09B9,
|
|
0x09BE...0x09C4,
|
|
0x09C7...0x09C8,
|
|
0x09CB...0x09CD,
|
|
0x09DC...0x09DD,
|
|
0x09DF...0x09E3,
|
|
0x09F0...0x09F1,
|
|
|
|
// Gurmukhi
|
|
0x0A02,
|
|
0x0A05...0x0A0A,
|
|
0x0A0F...0x0A10,
|
|
0x0A13...0x0A28,
|
|
0x0A2A...0x0A30,
|
|
0x0A32...0x0A33,
|
|
0x0A35...0x0A36,
|
|
0x0A38...0x0A39,
|
|
0x0A3E...0x0A42,
|
|
0x0A47...0x0A48,
|
|
0x0A4B...0x0A4D,
|
|
0x0A59...0x0A5C,
|
|
0x0A5E,
|
|
0x0A74,
|
|
|
|
// Gujarati
|
|
0x0A81...0x0A83,
|
|
0x0A85...0x0A8B,
|
|
0x0A8D,
|
|
0x0A8F...0x0A91,
|
|
0x0A93...0x0AA8,
|
|
0x0AAA...0x0AB0,
|
|
0x0AB2...0x0AB3,
|
|
0x0AB5...0x0AB9,
|
|
0x0ABD...0x0AC5,
|
|
0x0AC7...0x0AC9,
|
|
0x0ACB...0x0ACD,
|
|
0x0AD0,
|
|
0x0AE0,
|
|
|
|
// Oriya
|
|
0x0B01...0x0B03,
|
|
0x0B05...0x0B0C,
|
|
0x0B0F...0x0B10,
|
|
0x0B13...0x0B28,
|
|
0x0B2A...0x0B30,
|
|
0x0B32...0x0B33,
|
|
0x0B36...0x0B39,
|
|
0x0B3E...0x0B43,
|
|
0x0B47...0x0B48,
|
|
0x0B4B...0x0B4D,
|
|
0x0B5C...0x0B5D,
|
|
0x0B5F...0x0B61,
|
|
|
|
// Tamil
|
|
0x0B82...0x0B83,
|
|
0x0B85...0x0B8A,
|
|
0x0B8E...0x0B90,
|
|
0x0B92...0x0B95,
|
|
0x0B99...0x0B9A,
|
|
0x0B9C,
|
|
0x0B9E...0x0B9F,
|
|
0x0BA3...0x0BA4,
|
|
0x0BA8...0x0BAA,
|
|
0x0BAE...0x0BB5,
|
|
0x0BB7...0x0BB9,
|
|
0x0BBE...0x0BC2,
|
|
0x0BC6...0x0BC8,
|
|
0x0BCA...0x0BCD,
|
|
|
|
// Telugu
|
|
0x0C01...0x0C03,
|
|
0x0C05...0x0C0C,
|
|
0x0C0E...0x0C10,
|
|
0x0C12...0x0C28,
|
|
0x0C2A...0x0C33,
|
|
0x0C35...0x0C39,
|
|
0x0C3E...0x0C44,
|
|
0x0C46...0x0C48,
|
|
0x0C4A...0x0C4D,
|
|
0x0C60...0x0C61,
|
|
|
|
// Kannada
|
|
0x0C82...0x0C83,
|
|
0x0C85...0x0C8C,
|
|
0x0C8E...0x0C90,
|
|
0x0C92...0x0CA8,
|
|
0x0CAA...0x0CB3,
|
|
0x0CB5...0x0CB9,
|
|
0x0CBE...0x0CC4,
|
|
0x0CC6...0x0CC8,
|
|
0x0CCA...0x0CCD,
|
|
0x0CDE,
|
|
0x0CE0...0x0CE1,
|
|
|
|
// Malayalam
|
|
0x0D02...0x0D03,
|
|
0x0D05...0x0D0C,
|
|
0x0D0E...0x0D10,
|
|
0x0D12...0x0D28,
|
|
0x0D2A...0x0D39,
|
|
0x0D3E...0x0D43,
|
|
0x0D46...0x0D48,
|
|
0x0D4A...0x0D4D,
|
|
0x0D60...0x0D61,
|
|
|
|
// Thai (excluding digits 0x0E50...0x0E59; originally 0x0E01...0x0E3A and 0x0E40...0x0E5B
|
|
0x0E01...0x0E3A,
|
|
0x0E40...0x0E4F,
|
|
0x0E5A...0x0E5B,
|
|
|
|
// Lao
|
|
0x0E81...0x0E82,
|
|
0x0E84,
|
|
0x0E87...0x0E88,
|
|
0x0E8A,
|
|
0x0E8D,
|
|
0x0E94...0x0E97,
|
|
0x0E99...0x0E9F,
|
|
0x0EA1...0x0EA3,
|
|
0x0EA5,
|
|
0x0EA7,
|
|
0x0EAA...0x0EAB,
|
|
0x0EAD...0x0EAE,
|
|
0x0EB0...0x0EB9,
|
|
0x0EBB...0x0EBD,
|
|
0x0EC0...0x0EC4,
|
|
0x0EC6,
|
|
0x0EC8...0x0ECD,
|
|
0x0EDC...0x0EDD,
|
|
|
|
// Tibetan
|
|
0x0F00,
|
|
0x0F18...0x0F19,
|
|
0x0F35,
|
|
0x0F37,
|
|
0x0F39,
|
|
0x0F3E...0x0F47,
|
|
0x0F49...0x0F69,
|
|
0x0F71...0x0F84,
|
|
0x0F86...0x0F8B,
|
|
0x0F90...0x0F95,
|
|
0x0F97,
|
|
0x0F99...0x0FAD,
|
|
0x0FB1...0x0FB7,
|
|
0x0FB9,
|
|
|
|
// Georgian
|
|
0x10A0...0x10C5,
|
|
0x10D0...0x10F6,
|
|
|
|
// Hiragana
|
|
0x3041...0x3093,
|
|
0x309B...0x309C,
|
|
|
|
// Katakana
|
|
0x30A1...0x30F6,
|
|
0x30FB...0x30FC,
|
|
|
|
// Bopomofo
|
|
0x3105...0x312C,
|
|
|
|
// CJK Unified Ideographs
|
|
0x4E00...0x9FA5,
|
|
|
|
// Hangul
|
|
0xAC00...0xD7A3,
|
|
|
|
// Digits
|
|
0x0660...0x0669,
|
|
0x06F0...0x06F9,
|
|
0x0966...0x096F,
|
|
0x09E6...0x09EF,
|
|
0x0A66...0x0A6F,
|
|
0x0AE6...0x0AEF,
|
|
0x0B66...0x0B6F,
|
|
0x0BE7...0x0BEF,
|
|
0x0C66...0x0C6F,
|
|
0x0CE6...0x0CEF,
|
|
0x0D66...0x0D6F,
|
|
0x0E50...0x0E59,
|
|
0x0ED0...0x0ED9,
|
|
0x0F20...0x0F33,
|
|
|
|
// Special characters
|
|
0x00B5,
|
|
0x00B7,
|
|
0x02B0...0x02B8,
|
|
0x02BB,
|
|
0x02BD...0x02C1,
|
|
0x02D0...0x02D1,
|
|
0x02E0...0x02E4,
|
|
0x037A,
|
|
0x0559,
|
|
0x093D,
|
|
0x0B3D,
|
|
0x1FBE,
|
|
0x203F...0x2040,
|
|
0x2102,
|
|
0x2107,
|
|
0x210A...0x2113,
|
|
0x2115,
|
|
0x2118...0x211D,
|
|
0x2124,
|
|
0x2126,
|
|
0x2128,
|
|
0x212A...0x2131,
|
|
0x2133...0x2138,
|
|
0x2160...0x2182,
|
|
0x3005...0x3007,
|
|
0x3021...0x3029,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
/// C11 standard Annex D
|
|
pub fn isC11DisallowedInitialIdChar(codepoint: u21) bool {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
0x0300...0x036F,
|
|
0x1DC0...0x1DFF,
|
|
0x20D0...0x20FF,
|
|
0xFE20...0xFE2F,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
/// These are "digit" characters; C99 disallows them as the first
|
|
/// character of an identifier
|
|
pub fn isC99DisallowedInitialIDChar(codepoint: u21) bool {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
0x0660...0x0669,
|
|
0x06F0...0x06F9,
|
|
0x0966...0x096F,
|
|
0x09E6...0x09EF,
|
|
0x0A66...0x0A6F,
|
|
0x0AE6...0x0AEF,
|
|
0x0B66...0x0B6F,
|
|
0x0BE7...0x0BEF,
|
|
0x0C66...0x0C6F,
|
|
0x0CE6...0x0CEF,
|
|
0x0D66...0x0D6F,
|
|
0x0E50...0x0E59,
|
|
0x0ED0...0x0ED9,
|
|
0x0F20...0x0F33,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
pub fn isInvisible(codepoint: u21) bool {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
0x00ad, // SOFT HYPHEN
|
|
0x200b, // ZERO WIDTH SPACE
|
|
0x200c, // ZERO WIDTH NON-JOINER
|
|
0x200d, // ZERO WIDTH JOINER
|
|
0x2060, // WORD JOINER
|
|
0x2061, // FUNCTION APPLICATION
|
|
0x2062, // INVISIBLE TIMES
|
|
0x2063, // INVISIBLE SEPARATOR
|
|
0x2064, // INVISIBLE PLUS
|
|
0xfeff, // ZERO WIDTH NO-BREAK SPACE
|
|
=> true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
/// Checks for identifier characters which resemble non-identifier characters
|
|
pub fn homoglyph(codepoint: u21) ?u21 {
|
|
assert(codepoint > 0x7F);
|
|
return switch (codepoint) {
|
|
0x01c3 => '!', // LATIN LETTER RETROFLEX CLICK
|
|
0x037e => ';', // GREEK QUESTION MARK
|
|
0x2212 => '-', // MINUS SIGN
|
|
0x2215 => '/', // DIVISION SLASH
|
|
0x2216 => '\\', // SET MINUS
|
|
0x2217 => '*', // ASTERISK OPERATOR
|
|
0x2223 => '|', // DIVIDES
|
|
0x2227 => '^', // LOGICAL AND
|
|
0x2236 => ':', // RATIO
|
|
0x223c => '~', // TILDE OPERATOR
|
|
0xa789 => ':', // MODIFIER LETTER COLON
|
|
0xff01 => '!', // FULLWIDTH EXCLAMATION MARK
|
|
0xff03 => '#', // FULLWIDTH NUMBER SIGN
|
|
0xff04 => '$', // FULLWIDTH DOLLAR SIGN
|
|
0xff05 => '%', // FULLWIDTH PERCENT SIGN
|
|
0xff06 => '&', // FULLWIDTH AMPERSAND
|
|
0xff08 => '(', // FULLWIDTH LEFT PARENTHESIS
|
|
0xff09 => ')', // FULLWIDTH RIGHT PARENTHESIS
|
|
0xff0a => '*', // FULLWIDTH ASTERISK
|
|
0xff0b => '+', // FULLWIDTH ASTERISK
|
|
0xff0c => ',', // FULLWIDTH COMMA
|
|
0xff0d => '-', // FULLWIDTH HYPHEN-MINUS
|
|
0xff0e => '.', // FULLWIDTH FULL STOP
|
|
0xff0f => '/', // FULLWIDTH SOLIDUS
|
|
0xff1a => ':', // FULLWIDTH COLON
|
|
0xff1b => ';', // FULLWIDTH SEMICOLON
|
|
0xff1c => '<', // FULLWIDTH LESS-THAN SIGN
|
|
0xff1d => '=', // FULLWIDTH EQUALS SIGN
|
|
0xff1e => '>', // FULLWIDTH GREATER-THAN SIGN
|
|
0xff1f => '?', // FULLWIDTH QUESTION MARK
|
|
0xff20 => '@', // FULLWIDTH COMMERCIAL AT
|
|
0xff3b => '[', // FULLWIDTH LEFT SQUARE BRACKET
|
|
0xff3c => '\\', // FULLWIDTH REVERSE SOLIDUS
|
|
0xff3d => ']', // FULLWIDTH RIGHT SQUARE BRACKET
|
|
0xff3e => '^', // FULLWIDTH CIRCUMFLEX ACCENT
|
|
0xff5b => '{', // FULLWIDTH LEFT CURLY BRACKET
|
|
0xff5c => '|', // FULLWIDTH VERTICAL LINE
|
|
0xff5d => '}', // FULLWIDTH RIGHT CURLY BRACKET
|
|
0xff5e => '~', // FULLWIDTH TILDE
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
pub fn isXidStart(c: u21) bool {
|
|
assert(c > 0x7F);
|
|
const idx = c / 8 / tables.chunk;
|
|
const chunk: usize = if (idx < tables.trie_start.len) tables.trie_start[idx] else 0;
|
|
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
|
|
return (tables.leaf[offset] >> (@as(u3, @intCast(c % 8)))) & 1 != 0;
|
|
}
|
|
|
|
pub fn isXidContinue(c: u21) bool {
|
|
assert(c > 0x7F);
|
|
const idx = c / 8 / tables.chunk;
|
|
const chunk: usize = if (idx < tables.trie_continue.len) tables.trie_continue[idx] else 0;
|
|
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
|
|
return (tables.leaf[offset] >> (@as(u3, @intCast(c % 8)))) & 1 != 0;
|
|
}
|
|
|
|
test "isXidStart / isXidContinue panic check" {
|
|
const std = @import("std");
|
|
for (0x80..0x110000) |i| {
|
|
const c: u21 = @intCast(i);
|
|
if (std.unicode.utf8ValidCodepoint(c)) {
|
|
_ = isXidStart(c);
|
|
_ = isXidContinue(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
test isXidStart {
|
|
const std = @import("std");
|
|
try std.testing.expect(!isXidStart('á ‘'));
|
|
try std.testing.expect(!isXidStart('â„¢'));
|
|
try std.testing.expect(!isXidStart('£'));
|
|
try std.testing.expect(!isXidStart('\u{1f914}')); // 🤔
|
|
}
|
|
|
|
test isXidContinue {
|
|
const std = @import("std");
|
|
try std.testing.expect(isXidContinue('á ‘'));
|
|
try std.testing.expect(!isXidContinue('â„¢'));
|
|
try std.testing.expect(!isXidContinue('£'));
|
|
try std.testing.expect(!isXidContinue('\u{1f914}')); // 🤔
|
|
}
|
|
|
|
pub const NfcQuickCheck = enum { no, maybe, yes };
|
|
|
|
pub fn isNormalized(codepoint: u21) NfcQuickCheck {
|
|
return switch (codepoint) {
|
|
0x0340...0x0341,
|
|
0x0343...0x0344,
|
|
0x0374,
|
|
0x037E,
|
|
0x0387,
|
|
0x0958...0x095F,
|
|
0x09DC...0x09DD,
|
|
0x09DF,
|
|
0x0A33,
|
|
0x0A36,
|
|
0x0A59...0x0A5B,
|
|
0x0A5E,
|
|
0x0B5C...0x0B5D,
|
|
0x0F43,
|
|
0x0F4D,
|
|
0x0F52,
|
|
0x0F57,
|
|
0x0F5C,
|
|
0x0F69,
|
|
0x0F73,
|
|
0x0F75...0x0F76,
|
|
0x0F78,
|
|
0x0F81,
|
|
0x0F93,
|
|
0x0F9D,
|
|
0x0FA2,
|
|
0x0FA7,
|
|
0x0FAC,
|
|
0x0FB9,
|
|
0x1F71,
|
|
0x1F73,
|
|
0x1F75,
|
|
0x1F77,
|
|
0x1F79,
|
|
0x1F7B,
|
|
0x1F7D,
|
|
0x1FBB,
|
|
0x1FBE,
|
|
0x1FC9,
|
|
0x1FCB,
|
|
0x1FD3,
|
|
0x1FDB,
|
|
0x1FE3,
|
|
0x1FEB,
|
|
0x1FEE...0x1FEF,
|
|
0x1FF9,
|
|
0x1FFB,
|
|
0x1FFD,
|
|
0x2000...0x2001,
|
|
0x2126,
|
|
0x212A...0x212B,
|
|
0x2329,
|
|
0x232A,
|
|
0x2ADC,
|
|
0xF900...0xFA0D,
|
|
0xFA10,
|
|
0xFA12,
|
|
0xFA15...0xFA1E,
|
|
0xFA20,
|
|
0xFA22,
|
|
0xFA25...0xFA26,
|
|
0xFA2A...0xFA6D,
|
|
0xFA70...0xFAD9,
|
|
0xFB1D,
|
|
0xFB1F,
|
|
0xFB2A...0xFB36,
|
|
0xFB38...0xFB3C,
|
|
0xFB3E,
|
|
0xFB40...0xFB41,
|
|
0xFB43...0xFB44,
|
|
0xFB46...0xFB4E,
|
|
0x1D15E...0x1D164,
|
|
0x1D1BB...0x1D1C0,
|
|
0x2F800...0x2FA1D,
|
|
=> .no,
|
|
0x0300...0x0304,
|
|
0x0306...0x030C,
|
|
0x030F,
|
|
0x0311,
|
|
0x0313...0x0314,
|
|
0x031B,
|
|
0x0323...0x0328,
|
|
0x032D...0x032E,
|
|
0x0330...0x0331,
|
|
0x0338,
|
|
0x0342,
|
|
0x0345,
|
|
0x0653...0x0655,
|
|
0x093C,
|
|
0x09BE,
|
|
0x09D7,
|
|
0x0B3E,
|
|
0x0B56,
|
|
0x0B57,
|
|
0x0BBE,
|
|
0x0BD7,
|
|
0x0C56,
|
|
0x0CC2,
|
|
0x0CD5...0x0CD6,
|
|
0x0D3E,
|
|
0x0D57,
|
|
0x0DCA,
|
|
0x0DCF,
|
|
0x0DDF,
|
|
0x102E,
|
|
0x1161...0x1175,
|
|
0x11A8...0x11C2,
|
|
0x1B35,
|
|
0x3099...0x309A,
|
|
0x110BA,
|
|
0x11127,
|
|
0x1133E,
|
|
0x11357,
|
|
0x114B0,
|
|
0x114BA,
|
|
0x114BD,
|
|
0x115AF,
|
|
=> .maybe,
|
|
else => .yes,
|
|
};
|
|
}
|
|
|
|
pub const CanonicalCombiningClass = enum(u8) {
|
|
not_reordered = 0,
|
|
overlay = 1,
|
|
han_reading = 6,
|
|
nukta = 7,
|
|
kana_voicing = 8,
|
|
virama = 9,
|
|
ccc10 = 10,
|
|
ccc11 = 11,
|
|
ccc12 = 12,
|
|
ccc13 = 13,
|
|
ccc14 = 14,
|
|
ccc15 = 15,
|
|
ccc16 = 16,
|
|
ccc17 = 17,
|
|
ccc18 = 18,
|
|
ccc19 = 19,
|
|
ccc20 = 20,
|
|
ccc21 = 21,
|
|
ccc22 = 22,
|
|
ccc23 = 23,
|
|
ccc24 = 24,
|
|
ccc25 = 25,
|
|
ccc26 = 26,
|
|
ccc27 = 27,
|
|
ccc28 = 28,
|
|
ccc29 = 29,
|
|
ccc30 = 30,
|
|
ccc31 = 31,
|
|
ccc32 = 32,
|
|
ccc33 = 33,
|
|
ccc34 = 34,
|
|
ccc35 = 35,
|
|
ccc36 = 36,
|
|
ccc84 = 84,
|
|
ccc91 = 91,
|
|
ccc103 = 103,
|
|
ccc107 = 107,
|
|
ccc118 = 118,
|
|
ccc122 = 122,
|
|
ccc129 = 129,
|
|
ccc130 = 130,
|
|
ccc132 = 132,
|
|
attached_below = 202,
|
|
attached_above = 214,
|
|
attached_above_right = 216,
|
|
below_left = 218,
|
|
below = 220,
|
|
below_right = 222,
|
|
left = 224,
|
|
right = 226,
|
|
above_left = 228,
|
|
above = 230,
|
|
above_right = 232,
|
|
double_below = 233,
|
|
double_above = 234,
|
|
iota_subscript = 240,
|
|
};
|
|
|
|
pub fn getCanonicalClass(codepoint: u21) CanonicalCombiningClass {
|
|
return switch (codepoint) {
|
|
0x300...0x314 => .above,
|
|
0x315...0x315 => .above_right,
|
|
0x316...0x319 => .below,
|
|
0x31A...0x31A => .above_right,
|
|
0x31B...0x31B => .attached_above_right,
|
|
0x31C...0x320 => .below,
|
|
0x321...0x322 => .attached_below,
|
|
0x323...0x326 => .below,
|
|
0x327...0x328 => .attached_below,
|
|
0x329...0x333 => .below,
|
|
0x334...0x338 => .overlay,
|
|
0x339...0x33C => .below,
|
|
0x33D...0x344 => .above,
|
|
0x345...0x345 => .iota_subscript,
|
|
0x346...0x346 => .above,
|
|
0x347...0x349 => .below,
|
|
0x34A...0x34C => .above,
|
|
0x34D...0x34E => .below,
|
|
0x350...0x352 => .above,
|
|
0x353...0x356 => .below,
|
|
0x357...0x357 => .above,
|
|
0x358...0x358 => .above_right,
|
|
0x359...0x35A => .below,
|
|
0x35B...0x35B => .above,
|
|
0x35C...0x35C => .double_below,
|
|
0x35D...0x35E => .double_above,
|
|
0x35F...0x35F => .double_below,
|
|
0x360...0x361 => .double_above,
|
|
0x362...0x362 => .double_below,
|
|
0x363...0x36F => .above,
|
|
0x483...0x487 => .above,
|
|
0x591...0x591 => .below,
|
|
0x592...0x595 => .above,
|
|
0x596...0x596 => .below,
|
|
0x597...0x599 => .above,
|
|
0x59A...0x59A => .below_right,
|
|
0x59B...0x59B => .below,
|
|
0x59C...0x5A1 => .above,
|
|
0x5A2...0x5A7 => .below,
|
|
0x5A8...0x5A9 => .above,
|
|
0x5AA...0x5AA => .below,
|
|
0x5AB...0x5AC => .above,
|
|
0x5AD...0x5AD => .below_right,
|
|
0x5AE...0x5AE => .above_left,
|
|
0x5AF...0x5AF => .above,
|
|
0x5B0...0x5B0 => .ccc10,
|
|
0x5B1...0x5B1 => .ccc11,
|
|
0x5B2...0x5B2 => .ccc12,
|
|
0x5B3...0x5B3 => .ccc13,
|
|
0x5B4...0x5B4 => .ccc14,
|
|
0x5B5...0x5B5 => .ccc15,
|
|
0x5B6...0x5B6 => .ccc16,
|
|
0x5B7...0x5B7 => .ccc17,
|
|
0x5B8...0x5B8 => .ccc18,
|
|
0x5B9...0x5BA => .ccc19,
|
|
0x5BB...0x5BB => .ccc20,
|
|
0x5BC...0x5BC => .ccc21,
|
|
0x5BD...0x5BD => .ccc22,
|
|
0x5BF...0x5BF => .ccc23,
|
|
0x5C1...0x5C1 => .ccc24,
|
|
0x5C2...0x5C2 => .ccc25,
|
|
0x5C4...0x5C4 => .above,
|
|
0x5C5...0x5C5 => .below,
|
|
0x5C7...0x5C7 => .ccc18,
|
|
0x610...0x617 => .above,
|
|
0x618...0x618 => .ccc30,
|
|
0x619...0x619 => .ccc31,
|
|
0x61A...0x61A => .ccc32,
|
|
0x64B...0x64B => .ccc27,
|
|
0x64C...0x64C => .ccc28,
|
|
0x64D...0x64D => .ccc29,
|
|
0x64E...0x64E => .ccc30,
|
|
0x64F...0x64F => .ccc31,
|
|
0x650...0x650 => .ccc32,
|
|
0x651...0x651 => .ccc33,
|
|
0x652...0x652 => .ccc34,
|
|
0x653...0x654 => .above,
|
|
0x655...0x656 => .below,
|
|
0x657...0x65B => .above,
|
|
0x65C...0x65C => .below,
|
|
0x65D...0x65E => .above,
|
|
0x65F...0x65F => .below,
|
|
0x670...0x670 => .ccc35,
|
|
0x6D6...0x6DC => .above,
|
|
0x6DF...0x6E2 => .above,
|
|
0x6E3...0x6E3 => .below,
|
|
0x6E4...0x6E4 => .above,
|
|
0x6E7...0x6E8 => .above,
|
|
0x6EA...0x6EA => .below,
|
|
0x6EB...0x6EC => .above,
|
|
0x6ED...0x6ED => .below,
|
|
0x711...0x711 => .ccc36,
|
|
0x730...0x730 => .above,
|
|
0x731...0x731 => .below,
|
|
0x732...0x733 => .above,
|
|
0x734...0x734 => .below,
|
|
0x735...0x736 => .above,
|
|
0x737...0x739 => .below,
|
|
0x73A...0x73A => .above,
|
|
0x73B...0x73C => .below,
|
|
0x73D...0x73D => .above,
|
|
0x73E...0x73E => .below,
|
|
0x73F...0x741 => .above,
|
|
0x742...0x742 => .below,
|
|
0x743...0x743 => .above,
|
|
0x744...0x744 => .below,
|
|
0x745...0x745 => .above,
|
|
0x746...0x746 => .below,
|
|
0x747...0x747 => .above,
|
|
0x748...0x748 => .below,
|
|
0x749...0x74A => .above,
|
|
0x7EB...0x7F1 => .above,
|
|
0x7F2...0x7F2 => .below,
|
|
0x7F3...0x7F3 => .above,
|
|
0x7FD...0x7FD => .below,
|
|
0x816...0x819 => .above,
|
|
0x81B...0x823 => .above,
|
|
0x825...0x827 => .above,
|
|
0x829...0x82D => .above,
|
|
0x859...0x85B => .below,
|
|
0x898...0x898 => .above,
|
|
0x899...0x89B => .below,
|
|
0x89C...0x89F => .above,
|
|
0x8CA...0x8CE => .above,
|
|
0x8CF...0x8D3 => .below,
|
|
0x8D4...0x8E1 => .above,
|
|
0x8E3...0x8E3 => .below,
|
|
0x8E4...0x8E5 => .above,
|
|
0x8E6...0x8E6 => .below,
|
|
0x8E7...0x8E8 => .above,
|
|
0x8E9...0x8E9 => .below,
|
|
0x8EA...0x8EC => .above,
|
|
0x8ED...0x8EF => .below,
|
|
0x8F0...0x8F0 => .ccc27,
|
|
0x8F1...0x8F1 => .ccc28,
|
|
0x8F2...0x8F2 => .ccc29,
|
|
0x8F3...0x8F5 => .above,
|
|
0x8F6...0x8F6 => .below,
|
|
0x8F7...0x8F8 => .above,
|
|
0x8F9...0x8FA => .below,
|
|
0x8FB...0x8FF => .above,
|
|
0x93C...0x93C => .nukta,
|
|
0x94D...0x94D => .virama,
|
|
0x951...0x951 => .above,
|
|
0x952...0x952 => .below,
|
|
0x953...0x954 => .above,
|
|
0x9BC...0x9BC => .nukta,
|
|
0x9CD...0x9CD => .virama,
|
|
0x9FE...0x9FE => .above,
|
|
0xA3C...0xA3C => .nukta,
|
|
0xA4D...0xA4D => .virama,
|
|
0xABC...0xABC => .nukta,
|
|
0xACD...0xACD => .virama,
|
|
0xB3C...0xB3C => .nukta,
|
|
0xB4D...0xB4D => .virama,
|
|
0xBCD...0xBCD => .virama,
|
|
0xC3C...0xC3C => .nukta,
|
|
0xC4D...0xC4D => .virama,
|
|
0xC55...0xC55 => .ccc84,
|
|
0xC56...0xC56 => .ccc91,
|
|
0xCBC...0xCBC => .nukta,
|
|
0xCCD...0xCCD => .virama,
|
|
0xD3B...0xD3C => .virama,
|
|
0xD4D...0xD4D => .virama,
|
|
0xDCA...0xDCA => .virama,
|
|
0xE38...0xE39 => .ccc103,
|
|
0xE3A...0xE3A => .virama,
|
|
0xE48...0xE4B => .ccc107,
|
|
0xEB8...0xEB9 => .ccc118,
|
|
0xEBA...0xEBA => .virama,
|
|
0xEC8...0xECB => .ccc122,
|
|
0xF18...0xF19 => .below,
|
|
0xF35...0xF35 => .below,
|
|
0xF37...0xF37 => .below,
|
|
0xF39...0xF39 => .attached_above_right,
|
|
0xF71...0xF71 => .ccc129,
|
|
0xF72...0xF72 => .ccc130,
|
|
0xF74...0xF74 => .ccc132,
|
|
0xF7A...0xF7D => .ccc130,
|
|
0xF80...0xF80 => .ccc130,
|
|
0xF82...0xF83 => .above,
|
|
0xF84...0xF84 => .virama,
|
|
0xF86...0xF87 => .above,
|
|
0xFC6...0xFC6 => .below,
|
|
0x1037...0x1037 => .nukta,
|
|
0x1039...0x103A => .virama,
|
|
0x108D...0x108D => .below,
|
|
0x135D...0x135F => .above,
|
|
0x1714...0x1715 => .virama,
|
|
0x1734...0x1734 => .virama,
|
|
0x17D2...0x17D2 => .virama,
|
|
0x17DD...0x17DD => .above,
|
|
0x18A9...0x18A9 => .above_left,
|
|
0x1939...0x1939 => .below_right,
|
|
0x193A...0x193A => .above,
|
|
0x193B...0x193B => .below,
|
|
0x1A17...0x1A17 => .above,
|
|
0x1A18...0x1A18 => .below,
|
|
0x1A60...0x1A60 => .virama,
|
|
0x1A75...0x1A7C => .above,
|
|
0x1A7F...0x1A7F => .below,
|
|
0x1AB0...0x1AB4 => .above,
|
|
0x1AB5...0x1ABA => .below,
|
|
0x1ABB...0x1ABC => .above,
|
|
0x1ABD...0x1ABD => .below,
|
|
0x1ABF...0x1AC0 => .below,
|
|
0x1AC1...0x1AC2 => .above,
|
|
0x1AC3...0x1AC4 => .below,
|
|
0x1AC5...0x1AC9 => .above,
|
|
0x1ACA...0x1ACA => .below,
|
|
0x1ACB...0x1ACE => .above,
|
|
0x1B34...0x1B34 => .nukta,
|
|
0x1B44...0x1B44 => .virama,
|
|
0x1B6B...0x1B6B => .above,
|
|
0x1B6C...0x1B6C => .below,
|
|
0x1B6D...0x1B73 => .above,
|
|
0x1BAA...0x1BAB => .virama,
|
|
0x1BE6...0x1BE6 => .nukta,
|
|
0x1BF2...0x1BF3 => .virama,
|
|
0x1C37...0x1C37 => .nukta,
|
|
0x1CD0...0x1CD2 => .above,
|
|
0x1CD4...0x1CD4 => .overlay,
|
|
0x1CD5...0x1CD9 => .below,
|
|
0x1CDA...0x1CDB => .above,
|
|
0x1CDC...0x1CDF => .below,
|
|
0x1CE0...0x1CE0 => .above,
|
|
0x1CE2...0x1CE8 => .overlay,
|
|
0x1CED...0x1CED => .below,
|
|
0x1CF4...0x1CF4 => .above,
|
|
0x1CF8...0x1CF9 => .above,
|
|
0x1DC0...0x1DC1 => .above,
|
|
0x1DC2...0x1DC2 => .below,
|
|
0x1DC3...0x1DC9 => .above,
|
|
0x1DCA...0x1DCA => .below,
|
|
0x1DCB...0x1DCC => .above,
|
|
0x1DCD...0x1DCD => .double_above,
|
|
0x1DCE...0x1DCE => .attached_above,
|
|
0x1DCF...0x1DCF => .below,
|
|
0x1DD0...0x1DD0 => .attached_below,
|
|
0x1DD1...0x1DF5 => .above,
|
|
0x1DF6...0x1DF6 => .above_right,
|
|
0x1DF7...0x1DF8 => .above_left,
|
|
0x1DF9...0x1DF9 => .below,
|
|
0x1DFA...0x1DFA => .below_left,
|
|
0x1DFB...0x1DFB => .above,
|
|
0x1DFC...0x1DFC => .double_below,
|
|
0x1DFD...0x1DFD => .below,
|
|
0x1DFE...0x1DFE => .above,
|
|
0x1DFF...0x1DFF => .below,
|
|
0x20D0...0x20D1 => .above,
|
|
0x20D2...0x20D3 => .overlay,
|
|
0x20D4...0x20D7 => .above,
|
|
0x20D8...0x20DA => .overlay,
|
|
0x20DB...0x20DC => .above,
|
|
0x20E1...0x20E1 => .above,
|
|
0x20E5...0x20E6 => .overlay,
|
|
0x20E7...0x20E7 => .above,
|
|
0x20E8...0x20E8 => .below,
|
|
0x20E9...0x20E9 => .above,
|
|
0x20EA...0x20EB => .overlay,
|
|
0x20EC...0x20EF => .below,
|
|
0x20F0...0x20F0 => .above,
|
|
0x2CEF...0x2CF1 => .above,
|
|
0x2D7F...0x2D7F => .virama,
|
|
0x2DE0...0x2DFF => .above,
|
|
0x302A...0x302A => .below_left,
|
|
0x302B...0x302B => .above_left,
|
|
0x302C...0x302C => .above_right,
|
|
0x302D...0x302D => .below_right,
|
|
0x302E...0x302F => .left,
|
|
0x3099...0x309A => .kana_voicing,
|
|
0xA66F...0xA66F => .above,
|
|
0xA674...0xA67D => .above,
|
|
0xA69E...0xA69F => .above,
|
|
0xA6F0...0xA6F1 => .above,
|
|
0xA806...0xA806 => .virama,
|
|
0xA82C...0xA82C => .virama,
|
|
0xA8C4...0xA8C4 => .virama,
|
|
0xA8E0...0xA8F1 => .above,
|
|
0xA92B...0xA92D => .below,
|
|
0xA953...0xA953 => .virama,
|
|
0xA9B3...0xA9B3 => .nukta,
|
|
0xA9C0...0xA9C0 => .virama,
|
|
0xAAB0...0xAAB0 => .above,
|
|
0xAAB2...0xAAB3 => .above,
|
|
0xAAB4...0xAAB4 => .below,
|
|
0xAAB7...0xAAB8 => .above,
|
|
0xAABE...0xAABF => .above,
|
|
0xAAC1...0xAAC1 => .above,
|
|
0xAAF6...0xAAF6 => .virama,
|
|
0xABED...0xABED => .virama,
|
|
0xFB1E...0xFB1E => .ccc26,
|
|
0xFE20...0xFE26 => .above,
|
|
0xFE27...0xFE2D => .below,
|
|
0xFE2E...0xFE2F => .above,
|
|
0x101FD...0x101FD => .below,
|
|
0x102E0...0x102E0 => .below,
|
|
0x10376...0x1037A => .above,
|
|
0x10A0D...0x10A0D => .below,
|
|
0x10A0F...0x10A0F => .above,
|
|
0x10A38...0x10A38 => .above,
|
|
0x10A39...0x10A39 => .overlay,
|
|
0x10A3A...0x10A3A => .below,
|
|
0x10A3F...0x10A3F => .virama,
|
|
0x10AE5...0x10AE5 => .above,
|
|
0x10AE6...0x10AE6 => .below,
|
|
0x10D24...0x10D27 => .above,
|
|
0x10EAB...0x10EAC => .above,
|
|
0x10EFD...0x10EFF => .below,
|
|
0x10F46...0x10F47 => .below,
|
|
0x10F48...0x10F4A => .above,
|
|
0x10F4B...0x10F4B => .below,
|
|
0x10F4C...0x10F4C => .above,
|
|
0x10F4D...0x10F50 => .below,
|
|
0x10F82...0x10F82 => .above,
|
|
0x10F83...0x10F83 => .below,
|
|
0x10F84...0x10F84 => .above,
|
|
0x10F85...0x10F85 => .below,
|
|
0x11046...0x11046 => .virama,
|
|
0x11070...0x11070 => .virama,
|
|
0x1107F...0x1107F => .virama,
|
|
0x110B9...0x110B9 => .virama,
|
|
0x110BA...0x110BA => .nukta,
|
|
0x11100...0x11102 => .above,
|
|
0x11133...0x11134 => .virama,
|
|
0x11173...0x11173 => .nukta,
|
|
0x111C0...0x111C0 => .virama,
|
|
0x111CA...0x111CA => .nukta,
|
|
0x11235...0x11235 => .virama,
|
|
0x11236...0x11236 => .nukta,
|
|
0x112E9...0x112E9 => .nukta,
|
|
0x112EA...0x112EA => .virama,
|
|
0x1133B...0x1133C => .nukta,
|
|
0x1134D...0x1134D => .virama,
|
|
0x11366...0x1136C => .above,
|
|
0x11370...0x11374 => .above,
|
|
0x11442...0x11442 => .virama,
|
|
0x11446...0x11446 => .nukta,
|
|
0x1145E...0x1145E => .above,
|
|
0x114C2...0x114C2 => .virama,
|
|
0x114C3...0x114C3 => .nukta,
|
|
0x115BF...0x115BF => .virama,
|
|
0x115C0...0x115C0 => .nukta,
|
|
0x1163F...0x1163F => .virama,
|
|
0x116B6...0x116B6 => .virama,
|
|
0x116B7...0x116B7 => .nukta,
|
|
0x1172B...0x1172B => .virama,
|
|
0x11839...0x11839 => .virama,
|
|
0x1183A...0x1183A => .nukta,
|
|
0x1193D...0x1193E => .virama,
|
|
0x11943...0x11943 => .nukta,
|
|
0x119E0...0x119E0 => .virama,
|
|
0x11A34...0x11A34 => .virama,
|
|
0x11A47...0x11A47 => .virama,
|
|
0x11A99...0x11A99 => .virama,
|
|
0x11C3F...0x11C3F => .virama,
|
|
0x11D42...0x11D42 => .nukta,
|
|
0x11D44...0x11D45 => .virama,
|
|
0x11D97...0x11D97 => .virama,
|
|
0x11F41...0x11F42 => .virama,
|
|
0x16AF0...0x16AF4 => .overlay,
|
|
0x16B30...0x16B36 => .above,
|
|
0x16FF0...0x16FF1 => .han_reading,
|
|
0x1BC9E...0x1BC9E => .overlay,
|
|
0x1D165...0x1D166 => .attached_above_right,
|
|
0x1D167...0x1D169 => .overlay,
|
|
0x1D16D...0x1D16D => .right,
|
|
0x1D16E...0x1D172 => .attached_above_right,
|
|
0x1D17B...0x1D182 => .below,
|
|
0x1D185...0x1D189 => .above,
|
|
0x1D18A...0x1D18B => .below,
|
|
0x1D1AA...0x1D1AD => .above,
|
|
0x1D242...0x1D244 => .above,
|
|
0x1E000...0x1E006 => .above,
|
|
0x1E008...0x1E018 => .above,
|
|
0x1E01B...0x1E021 => .above,
|
|
0x1E023...0x1E024 => .above,
|
|
0x1E026...0x1E02A => .above,
|
|
0x1E08F...0x1E08F => .above,
|
|
0x1E130...0x1E136 => .above,
|
|
0x1E2AE...0x1E2AE => .above,
|
|
0x1E2EC...0x1E2EF => .above,
|
|
0x1E4EC...0x1E4ED => .above_right,
|
|
0x1E4EE...0x1E4EE => .below,
|
|
0x1E4EF...0x1E4EF => .above,
|
|
0x1E8D0...0x1E8D6 => .below,
|
|
0x1E944...0x1E949 => .above,
|
|
0x1E94A...0x1E94A => .nukta,
|
|
else => .not_reordered,
|
|
};
|
|
}
|