zig/lib/compiler/aro/aro/char_info.zig
Andrew Kelley 240d0b68f6 make aro-based translate-c lazily built from source
Part of #19063.

Primarily, this moves Aro from deps/ to lib/compiler/ so that it can be
lazily compiled from source. src/aro_translate_c.zig is moved to
lib/compiler/aro_translate_c.zig and some of Zig CLI logic moved to a
main() function there.

aro_translate_c.zig becomes the "common" import for clang-based
translate-c.

Not all of the compiler was able to be detangled from Aro, however, so
it still, for now, remains being compiled with the main compiler
sources due to the clang-based translate-c depending on it. Once
aro-based translate-c achieves feature parity with the clang-based
translate-c implementation, the clang-based one can be removed from Zig.

Aro made it unnecessarily difficult to depend on with these .def files
and all these Zig module requirements. I looked at the .def files and
made these observations:

- The canonical source is llvm .def files.
- Therefore there is an update process to sync with llvm that involves
  regenerating the .def files in Aro.
- Therefore you might as well just regenerate the .zig files directly
  and check those into Aro.
- Also with a small amount of tinkering, the file size on disk of these
  generated .zig files can be made many times smaller, without
  compromising type safety in the usage of the data.

This would make things much easier on Zig as downstream project,
particularly we could remove those pesky stubs when bootstrapping.

I have gone ahead with these changes since they unblock me and I will
have a chat with Vexu to see what he thinks.
2024-02-28 13:21:05 -07:00

1112 lines
30 KiB
Zig

//! This module provides functions for classifying characters according to
//! various C standards. All classification routines *do not* consider
//! characters from the basic character set; it is assumed those will be
//! checked separately
//! isXidStart and isXidContinue are adapted from https://github.com/dtolnay/unicode-ident
const assert = @import("std").debug.assert;
const tables = @import("char_info/identifier_tables.zig");
/// C11 Standard Annex D
pub fn isC11IdChar(codepoint: u21) bool {
assert(codepoint > 0x7F);
return switch (codepoint) {
// 1
0x00A8,
0x00AA,
0x00AD,
0x00AF,
0x00B2...0x00B5,
0x00B7...0x00BA,
0x00BC...0x00BE,
0x00C0...0x00D6,
0x00D8...0x00F6,
0x00F8...0x00FF,
// 2
0x0100...0x167F,
0x1681...0x180D,
0x180F...0x1FFF,
// 3
0x200B...0x200D,
0x202A...0x202E,
0x203F...0x2040,
0x2054,
0x2060...0x206F,
// 4
0x2070...0x218F,
0x2460...0x24FF,
0x2776...0x2793,
0x2C00...0x2DFF,
0x2E80...0x2FFF,
// 5
0x3004...0x3007,
0x3021...0x302F,
0x3031...0x303F,
// 6
0x3040...0xD7FF,
// 7
0xF900...0xFD3D,
0xFD40...0xFDCF,
0xFDF0...0xFE44,
0xFE47...0xFFFD,
// 8
0x10000...0x1FFFD,
0x20000...0x2FFFD,
0x30000...0x3FFFD,
0x40000...0x4FFFD,
0x50000...0x5FFFD,
0x60000...0x6FFFD,
0x70000...0x7FFFD,
0x80000...0x8FFFD,
0x90000...0x9FFFD,
0xA0000...0xAFFFD,
0xB0000...0xBFFFD,
0xC0000...0xCFFFD,
0xD0000...0xDFFFD,
0xE0000...0xEFFFD,
=> true,
else => false,
};
}
/// C99 Standard Annex D
pub fn isC99IdChar(codepoint: u21) bool {
assert(codepoint > 0x7F);
return switch (codepoint) {
// Latin
0x00AA,
0x00BA,
0x00C0...0x00D6,
0x00D8...0x00F6,
0x00F8...0x01F5,
0x01FA...0x0217,
0x0250...0x02A8,
0x1E00...0x1E9B,
0x1EA0...0x1EF9,
0x207F,
// Greek
0x0386,
0x0388...0x038A,
0x038C,
0x038E...0x03A1,
0x03A3...0x03CE,
0x03D0...0x03D6,
0x03DA,
0x03DC,
0x03DE,
0x03E0,
0x03E2...0x03F3,
0x1F00...0x1F15,
0x1F18...0x1F1D,
0x1F20...0x1F45,
0x1F48...0x1F4D,
0x1F50...0x1F57,
0x1F59,
0x1F5B,
0x1F5D,
0x1F5F...0x1F7D,
0x1F80...0x1FB4,
0x1FB6...0x1FBC,
0x1FC2...0x1FC4,
0x1FC6...0x1FCC,
0x1FD0...0x1FD3,
0x1FD6...0x1FDB,
0x1FE0...0x1FEC,
0x1FF2...0x1FF4,
0x1FF6...0x1FFC,
// Cyrillic
0x0401...0x040C,
0x040E...0x044F,
0x0451...0x045C,
0x045E...0x0481,
0x0490...0x04C4,
0x04C7...0x04C8,
0x04CB...0x04CC,
0x04D0...0x04EB,
0x04EE...0x04F5,
0x04F8...0x04F9,
// Armenian
0x0531...0x0556,
0x0561...0x0587,
// Hebrew
0x05B0...0x05B9,
0x05BB...0x05BD,
0x05BF,
0x05C1...0x05C2,
0x05D0...0x05EA,
0x05F0...0x05F2,
// Arabic
0x0621...0x063A,
0x0640...0x0652,
0x0670...0x06B7,
0x06BA...0x06BE,
0x06C0...0x06CE,
0x06D0...0x06DC,
0x06E5...0x06E8,
0x06EA...0x06ED,
// Devanagari
0x0901...0x0903,
0x0905...0x0939,
0x093E...0x094D,
0x0950...0x0952,
0x0958...0x0963,
// Bengali
0x0981...0x0983,
0x0985...0x098C,
0x098F...0x0990,
0x0993...0x09A8,
0x09AA...0x09B0,
0x09B2,
0x09B6...0x09B9,
0x09BE...0x09C4,
0x09C7...0x09C8,
0x09CB...0x09CD,
0x09DC...0x09DD,
0x09DF...0x09E3,
0x09F0...0x09F1,
// Gurmukhi
0x0A02,
0x0A05...0x0A0A,
0x0A0F...0x0A10,
0x0A13...0x0A28,
0x0A2A...0x0A30,
0x0A32...0x0A33,
0x0A35...0x0A36,
0x0A38...0x0A39,
0x0A3E...0x0A42,
0x0A47...0x0A48,
0x0A4B...0x0A4D,
0x0A59...0x0A5C,
0x0A5E,
0x0A74,
// Gujarati
0x0A81...0x0A83,
0x0A85...0x0A8B,
0x0A8D,
0x0A8F...0x0A91,
0x0A93...0x0AA8,
0x0AAA...0x0AB0,
0x0AB2...0x0AB3,
0x0AB5...0x0AB9,
0x0ABD...0x0AC5,
0x0AC7...0x0AC9,
0x0ACB...0x0ACD,
0x0AD0,
0x0AE0,
// Oriya
0x0B01...0x0B03,
0x0B05...0x0B0C,
0x0B0F...0x0B10,
0x0B13...0x0B28,
0x0B2A...0x0B30,
0x0B32...0x0B33,
0x0B36...0x0B39,
0x0B3E...0x0B43,
0x0B47...0x0B48,
0x0B4B...0x0B4D,
0x0B5C...0x0B5D,
0x0B5F...0x0B61,
// Tamil
0x0B82...0x0B83,
0x0B85...0x0B8A,
0x0B8E...0x0B90,
0x0B92...0x0B95,
0x0B99...0x0B9A,
0x0B9C,
0x0B9E...0x0B9F,
0x0BA3...0x0BA4,
0x0BA8...0x0BAA,
0x0BAE...0x0BB5,
0x0BB7...0x0BB9,
0x0BBE...0x0BC2,
0x0BC6...0x0BC8,
0x0BCA...0x0BCD,
// Telugu
0x0C01...0x0C03,
0x0C05...0x0C0C,
0x0C0E...0x0C10,
0x0C12...0x0C28,
0x0C2A...0x0C33,
0x0C35...0x0C39,
0x0C3E...0x0C44,
0x0C46...0x0C48,
0x0C4A...0x0C4D,
0x0C60...0x0C61,
// Kannada
0x0C82...0x0C83,
0x0C85...0x0C8C,
0x0C8E...0x0C90,
0x0C92...0x0CA8,
0x0CAA...0x0CB3,
0x0CB5...0x0CB9,
0x0CBE...0x0CC4,
0x0CC6...0x0CC8,
0x0CCA...0x0CCD,
0x0CDE,
0x0CE0...0x0CE1,
// Malayalam
0x0D02...0x0D03,
0x0D05...0x0D0C,
0x0D0E...0x0D10,
0x0D12...0x0D28,
0x0D2A...0x0D39,
0x0D3E...0x0D43,
0x0D46...0x0D48,
0x0D4A...0x0D4D,
0x0D60...0x0D61,
// Thai (excluding digits 0x0E50...0x0E59; originally 0x0E01...0x0E3A and 0x0E40...0x0E5B
0x0E01...0x0E3A,
0x0E40...0x0E4F,
0x0E5A...0x0E5B,
// Lao
0x0E81...0x0E82,
0x0E84,
0x0E87...0x0E88,
0x0E8A,
0x0E8D,
0x0E94...0x0E97,
0x0E99...0x0E9F,
0x0EA1...0x0EA3,
0x0EA5,
0x0EA7,
0x0EAA...0x0EAB,
0x0EAD...0x0EAE,
0x0EB0...0x0EB9,
0x0EBB...0x0EBD,
0x0EC0...0x0EC4,
0x0EC6,
0x0EC8...0x0ECD,
0x0EDC...0x0EDD,
// Tibetan
0x0F00,
0x0F18...0x0F19,
0x0F35,
0x0F37,
0x0F39,
0x0F3E...0x0F47,
0x0F49...0x0F69,
0x0F71...0x0F84,
0x0F86...0x0F8B,
0x0F90...0x0F95,
0x0F97,
0x0F99...0x0FAD,
0x0FB1...0x0FB7,
0x0FB9,
// Georgian
0x10A0...0x10C5,
0x10D0...0x10F6,
// Hiragana
0x3041...0x3093,
0x309B...0x309C,
// Katakana
0x30A1...0x30F6,
0x30FB...0x30FC,
// Bopomofo
0x3105...0x312C,
// CJK Unified Ideographs
0x4E00...0x9FA5,
// Hangul
0xAC00...0xD7A3,
// Digits
0x0660...0x0669,
0x06F0...0x06F9,
0x0966...0x096F,
0x09E6...0x09EF,
0x0A66...0x0A6F,
0x0AE6...0x0AEF,
0x0B66...0x0B6F,
0x0BE7...0x0BEF,
0x0C66...0x0C6F,
0x0CE6...0x0CEF,
0x0D66...0x0D6F,
0x0E50...0x0E59,
0x0ED0...0x0ED9,
0x0F20...0x0F33,
// Special characters
0x00B5,
0x00B7,
0x02B0...0x02B8,
0x02BB,
0x02BD...0x02C1,
0x02D0...0x02D1,
0x02E0...0x02E4,
0x037A,
0x0559,
0x093D,
0x0B3D,
0x1FBE,
0x203F...0x2040,
0x2102,
0x2107,
0x210A...0x2113,
0x2115,
0x2118...0x211D,
0x2124,
0x2126,
0x2128,
0x212A...0x2131,
0x2133...0x2138,
0x2160...0x2182,
0x3005...0x3007,
0x3021...0x3029,
=> true,
else => false,
};
}
/// C11 standard Annex D
pub fn isC11DisallowedInitialIdChar(codepoint: u21) bool {
assert(codepoint > 0x7F);
return switch (codepoint) {
0x0300...0x036F,
0x1DC0...0x1DFF,
0x20D0...0x20FF,
0xFE20...0xFE2F,
=> true,
else => false,
};
}
/// These are "digit" characters; C99 disallows them as the first
/// character of an identifier
pub fn isC99DisallowedInitialIDChar(codepoint: u21) bool {
assert(codepoint > 0x7F);
return switch (codepoint) {
0x0660...0x0669,
0x06F0...0x06F9,
0x0966...0x096F,
0x09E6...0x09EF,
0x0A66...0x0A6F,
0x0AE6...0x0AEF,
0x0B66...0x0B6F,
0x0BE7...0x0BEF,
0x0C66...0x0C6F,
0x0CE6...0x0CEF,
0x0D66...0x0D6F,
0x0E50...0x0E59,
0x0ED0...0x0ED9,
0x0F20...0x0F33,
=> true,
else => false,
};
}
pub fn isInvisible(codepoint: u21) bool {
assert(codepoint > 0x7F);
return switch (codepoint) {
0x00ad, // SOFT HYPHEN
0x200b, // ZERO WIDTH SPACE
0x200c, // ZERO WIDTH NON-JOINER
0x200d, // ZERO WIDTH JOINER
0x2060, // WORD JOINER
0x2061, // FUNCTION APPLICATION
0x2062, // INVISIBLE TIMES
0x2063, // INVISIBLE SEPARATOR
0x2064, // INVISIBLE PLUS
0xfeff, // ZERO WIDTH NO-BREAK SPACE
=> true,
else => false,
};
}
/// Checks for identifier characters which resemble non-identifier characters
pub fn homoglyph(codepoint: u21) ?u21 {
assert(codepoint > 0x7F);
return switch (codepoint) {
0x01c3 => '!', // LATIN LETTER RETROFLEX CLICK
0x037e => ';', // GREEK QUESTION MARK
0x2212 => '-', // MINUS SIGN
0x2215 => '/', // DIVISION SLASH
0x2216 => '\\', // SET MINUS
0x2217 => '*', // ASTERISK OPERATOR
0x2223 => '|', // DIVIDES
0x2227 => '^', // LOGICAL AND
0x2236 => ':', // RATIO
0x223c => '~', // TILDE OPERATOR
0xa789 => ':', // MODIFIER LETTER COLON
0xff01 => '!', // FULLWIDTH EXCLAMATION MARK
0xff03 => '#', // FULLWIDTH NUMBER SIGN
0xff04 => '$', // FULLWIDTH DOLLAR SIGN
0xff05 => '%', // FULLWIDTH PERCENT SIGN
0xff06 => '&', // FULLWIDTH AMPERSAND
0xff08 => '(', // FULLWIDTH LEFT PARENTHESIS
0xff09 => ')', // FULLWIDTH RIGHT PARENTHESIS
0xff0a => '*', // FULLWIDTH ASTERISK
0xff0b => '+', // FULLWIDTH ASTERISK
0xff0c => ',', // FULLWIDTH COMMA
0xff0d => '-', // FULLWIDTH HYPHEN-MINUS
0xff0e => '.', // FULLWIDTH FULL STOP
0xff0f => '/', // FULLWIDTH SOLIDUS
0xff1a => ':', // FULLWIDTH COLON
0xff1b => ';', // FULLWIDTH SEMICOLON
0xff1c => '<', // FULLWIDTH LESS-THAN SIGN
0xff1d => '=', // FULLWIDTH EQUALS SIGN
0xff1e => '>', // FULLWIDTH GREATER-THAN SIGN
0xff1f => '?', // FULLWIDTH QUESTION MARK
0xff20 => '@', // FULLWIDTH COMMERCIAL AT
0xff3b => '[', // FULLWIDTH LEFT SQUARE BRACKET
0xff3c => '\\', // FULLWIDTH REVERSE SOLIDUS
0xff3d => ']', // FULLWIDTH RIGHT SQUARE BRACKET
0xff3e => '^', // FULLWIDTH CIRCUMFLEX ACCENT
0xff5b => '{', // FULLWIDTH LEFT CURLY BRACKET
0xff5c => '|', // FULLWIDTH VERTICAL LINE
0xff5d => '}', // FULLWIDTH RIGHT CURLY BRACKET
0xff5e => '~', // FULLWIDTH TILDE
else => null,
};
}
pub fn isXidStart(c: u21) bool {
assert(c > 0x7F);
const idx = c / 8 / tables.chunk;
const chunk: usize = if (idx < tables.trie_start.len) tables.trie_start[idx] else 0;
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
return (tables.leaf[offset] >> (@as(u3, @intCast(c % 8)))) & 1 != 0;
}
pub fn isXidContinue(c: u21) bool {
assert(c > 0x7F);
const idx = c / 8 / tables.chunk;
const chunk: usize = if (idx < tables.trie_continue.len) tables.trie_continue[idx] else 0;
const offset = chunk * tables.chunk / 2 + c / 8 % tables.chunk;
return (tables.leaf[offset] >> (@as(u3, @intCast(c % 8)))) & 1 != 0;
}
test "isXidStart / isXidContinue panic check" {
const std = @import("std");
for (0x80..0x110000) |i| {
const c: u21 = @intCast(i);
if (std.unicode.utf8ValidCodepoint(c)) {
_ = isXidStart(c);
_ = isXidContinue(c);
}
}
}
test isXidStart {
const std = @import("std");
try std.testing.expect(!isXidStart('á ‘'));
try std.testing.expect(!isXidStart('â„¢'));
try std.testing.expect(!isXidStart('£'));
try std.testing.expect(!isXidStart('\u{1f914}')); // 🤔
}
test isXidContinue {
const std = @import("std");
try std.testing.expect(isXidContinue('á ‘'));
try std.testing.expect(!isXidContinue('â„¢'));
try std.testing.expect(!isXidContinue('£'));
try std.testing.expect(!isXidContinue('\u{1f914}')); // 🤔
}
pub const NfcQuickCheck = enum { no, maybe, yes };
pub fn isNormalized(codepoint: u21) NfcQuickCheck {
return switch (codepoint) {
0x0340...0x0341,
0x0343...0x0344,
0x0374,
0x037E,
0x0387,
0x0958...0x095F,
0x09DC...0x09DD,
0x09DF,
0x0A33,
0x0A36,
0x0A59...0x0A5B,
0x0A5E,
0x0B5C...0x0B5D,
0x0F43,
0x0F4D,
0x0F52,
0x0F57,
0x0F5C,
0x0F69,
0x0F73,
0x0F75...0x0F76,
0x0F78,
0x0F81,
0x0F93,
0x0F9D,
0x0FA2,
0x0FA7,
0x0FAC,
0x0FB9,
0x1F71,
0x1F73,
0x1F75,
0x1F77,
0x1F79,
0x1F7B,
0x1F7D,
0x1FBB,
0x1FBE,
0x1FC9,
0x1FCB,
0x1FD3,
0x1FDB,
0x1FE3,
0x1FEB,
0x1FEE...0x1FEF,
0x1FF9,
0x1FFB,
0x1FFD,
0x2000...0x2001,
0x2126,
0x212A...0x212B,
0x2329,
0x232A,
0x2ADC,
0xF900...0xFA0D,
0xFA10,
0xFA12,
0xFA15...0xFA1E,
0xFA20,
0xFA22,
0xFA25...0xFA26,
0xFA2A...0xFA6D,
0xFA70...0xFAD9,
0xFB1D,
0xFB1F,
0xFB2A...0xFB36,
0xFB38...0xFB3C,
0xFB3E,
0xFB40...0xFB41,
0xFB43...0xFB44,
0xFB46...0xFB4E,
0x1D15E...0x1D164,
0x1D1BB...0x1D1C0,
0x2F800...0x2FA1D,
=> .no,
0x0300...0x0304,
0x0306...0x030C,
0x030F,
0x0311,
0x0313...0x0314,
0x031B,
0x0323...0x0328,
0x032D...0x032E,
0x0330...0x0331,
0x0338,
0x0342,
0x0345,
0x0653...0x0655,
0x093C,
0x09BE,
0x09D7,
0x0B3E,
0x0B56,
0x0B57,
0x0BBE,
0x0BD7,
0x0C56,
0x0CC2,
0x0CD5...0x0CD6,
0x0D3E,
0x0D57,
0x0DCA,
0x0DCF,
0x0DDF,
0x102E,
0x1161...0x1175,
0x11A8...0x11C2,
0x1B35,
0x3099...0x309A,
0x110BA,
0x11127,
0x1133E,
0x11357,
0x114B0,
0x114BA,
0x114BD,
0x115AF,
=> .maybe,
else => .yes,
};
}
pub const CanonicalCombiningClass = enum(u8) {
not_reordered = 0,
overlay = 1,
han_reading = 6,
nukta = 7,
kana_voicing = 8,
virama = 9,
ccc10 = 10,
ccc11 = 11,
ccc12 = 12,
ccc13 = 13,
ccc14 = 14,
ccc15 = 15,
ccc16 = 16,
ccc17 = 17,
ccc18 = 18,
ccc19 = 19,
ccc20 = 20,
ccc21 = 21,
ccc22 = 22,
ccc23 = 23,
ccc24 = 24,
ccc25 = 25,
ccc26 = 26,
ccc27 = 27,
ccc28 = 28,
ccc29 = 29,
ccc30 = 30,
ccc31 = 31,
ccc32 = 32,
ccc33 = 33,
ccc34 = 34,
ccc35 = 35,
ccc36 = 36,
ccc84 = 84,
ccc91 = 91,
ccc103 = 103,
ccc107 = 107,
ccc118 = 118,
ccc122 = 122,
ccc129 = 129,
ccc130 = 130,
ccc132 = 132,
attached_below = 202,
attached_above = 214,
attached_above_right = 216,
below_left = 218,
below = 220,
below_right = 222,
left = 224,
right = 226,
above_left = 228,
above = 230,
above_right = 232,
double_below = 233,
double_above = 234,
iota_subscript = 240,
};
pub fn getCanonicalClass(codepoint: u21) CanonicalCombiningClass {
return switch (codepoint) {
0x300...0x314 => .above,
0x315...0x315 => .above_right,
0x316...0x319 => .below,
0x31A...0x31A => .above_right,
0x31B...0x31B => .attached_above_right,
0x31C...0x320 => .below,
0x321...0x322 => .attached_below,
0x323...0x326 => .below,
0x327...0x328 => .attached_below,
0x329...0x333 => .below,
0x334...0x338 => .overlay,
0x339...0x33C => .below,
0x33D...0x344 => .above,
0x345...0x345 => .iota_subscript,
0x346...0x346 => .above,
0x347...0x349 => .below,
0x34A...0x34C => .above,
0x34D...0x34E => .below,
0x350...0x352 => .above,
0x353...0x356 => .below,
0x357...0x357 => .above,
0x358...0x358 => .above_right,
0x359...0x35A => .below,
0x35B...0x35B => .above,
0x35C...0x35C => .double_below,
0x35D...0x35E => .double_above,
0x35F...0x35F => .double_below,
0x360...0x361 => .double_above,
0x362...0x362 => .double_below,
0x363...0x36F => .above,
0x483...0x487 => .above,
0x591...0x591 => .below,
0x592...0x595 => .above,
0x596...0x596 => .below,
0x597...0x599 => .above,
0x59A...0x59A => .below_right,
0x59B...0x59B => .below,
0x59C...0x5A1 => .above,
0x5A2...0x5A7 => .below,
0x5A8...0x5A9 => .above,
0x5AA...0x5AA => .below,
0x5AB...0x5AC => .above,
0x5AD...0x5AD => .below_right,
0x5AE...0x5AE => .above_left,
0x5AF...0x5AF => .above,
0x5B0...0x5B0 => .ccc10,
0x5B1...0x5B1 => .ccc11,
0x5B2...0x5B2 => .ccc12,
0x5B3...0x5B3 => .ccc13,
0x5B4...0x5B4 => .ccc14,
0x5B5...0x5B5 => .ccc15,
0x5B6...0x5B6 => .ccc16,
0x5B7...0x5B7 => .ccc17,
0x5B8...0x5B8 => .ccc18,
0x5B9...0x5BA => .ccc19,
0x5BB...0x5BB => .ccc20,
0x5BC...0x5BC => .ccc21,
0x5BD...0x5BD => .ccc22,
0x5BF...0x5BF => .ccc23,
0x5C1...0x5C1 => .ccc24,
0x5C2...0x5C2 => .ccc25,
0x5C4...0x5C4 => .above,
0x5C5...0x5C5 => .below,
0x5C7...0x5C7 => .ccc18,
0x610...0x617 => .above,
0x618...0x618 => .ccc30,
0x619...0x619 => .ccc31,
0x61A...0x61A => .ccc32,
0x64B...0x64B => .ccc27,
0x64C...0x64C => .ccc28,
0x64D...0x64D => .ccc29,
0x64E...0x64E => .ccc30,
0x64F...0x64F => .ccc31,
0x650...0x650 => .ccc32,
0x651...0x651 => .ccc33,
0x652...0x652 => .ccc34,
0x653...0x654 => .above,
0x655...0x656 => .below,
0x657...0x65B => .above,
0x65C...0x65C => .below,
0x65D...0x65E => .above,
0x65F...0x65F => .below,
0x670...0x670 => .ccc35,
0x6D6...0x6DC => .above,
0x6DF...0x6E2 => .above,
0x6E3...0x6E3 => .below,
0x6E4...0x6E4 => .above,
0x6E7...0x6E8 => .above,
0x6EA...0x6EA => .below,
0x6EB...0x6EC => .above,
0x6ED...0x6ED => .below,
0x711...0x711 => .ccc36,
0x730...0x730 => .above,
0x731...0x731 => .below,
0x732...0x733 => .above,
0x734...0x734 => .below,
0x735...0x736 => .above,
0x737...0x739 => .below,
0x73A...0x73A => .above,
0x73B...0x73C => .below,
0x73D...0x73D => .above,
0x73E...0x73E => .below,
0x73F...0x741 => .above,
0x742...0x742 => .below,
0x743...0x743 => .above,
0x744...0x744 => .below,
0x745...0x745 => .above,
0x746...0x746 => .below,
0x747...0x747 => .above,
0x748...0x748 => .below,
0x749...0x74A => .above,
0x7EB...0x7F1 => .above,
0x7F2...0x7F2 => .below,
0x7F3...0x7F3 => .above,
0x7FD...0x7FD => .below,
0x816...0x819 => .above,
0x81B...0x823 => .above,
0x825...0x827 => .above,
0x829...0x82D => .above,
0x859...0x85B => .below,
0x898...0x898 => .above,
0x899...0x89B => .below,
0x89C...0x89F => .above,
0x8CA...0x8CE => .above,
0x8CF...0x8D3 => .below,
0x8D4...0x8E1 => .above,
0x8E3...0x8E3 => .below,
0x8E4...0x8E5 => .above,
0x8E6...0x8E6 => .below,
0x8E7...0x8E8 => .above,
0x8E9...0x8E9 => .below,
0x8EA...0x8EC => .above,
0x8ED...0x8EF => .below,
0x8F0...0x8F0 => .ccc27,
0x8F1...0x8F1 => .ccc28,
0x8F2...0x8F2 => .ccc29,
0x8F3...0x8F5 => .above,
0x8F6...0x8F6 => .below,
0x8F7...0x8F8 => .above,
0x8F9...0x8FA => .below,
0x8FB...0x8FF => .above,
0x93C...0x93C => .nukta,
0x94D...0x94D => .virama,
0x951...0x951 => .above,
0x952...0x952 => .below,
0x953...0x954 => .above,
0x9BC...0x9BC => .nukta,
0x9CD...0x9CD => .virama,
0x9FE...0x9FE => .above,
0xA3C...0xA3C => .nukta,
0xA4D...0xA4D => .virama,
0xABC...0xABC => .nukta,
0xACD...0xACD => .virama,
0xB3C...0xB3C => .nukta,
0xB4D...0xB4D => .virama,
0xBCD...0xBCD => .virama,
0xC3C...0xC3C => .nukta,
0xC4D...0xC4D => .virama,
0xC55...0xC55 => .ccc84,
0xC56...0xC56 => .ccc91,
0xCBC...0xCBC => .nukta,
0xCCD...0xCCD => .virama,
0xD3B...0xD3C => .virama,
0xD4D...0xD4D => .virama,
0xDCA...0xDCA => .virama,
0xE38...0xE39 => .ccc103,
0xE3A...0xE3A => .virama,
0xE48...0xE4B => .ccc107,
0xEB8...0xEB9 => .ccc118,
0xEBA...0xEBA => .virama,
0xEC8...0xECB => .ccc122,
0xF18...0xF19 => .below,
0xF35...0xF35 => .below,
0xF37...0xF37 => .below,
0xF39...0xF39 => .attached_above_right,
0xF71...0xF71 => .ccc129,
0xF72...0xF72 => .ccc130,
0xF74...0xF74 => .ccc132,
0xF7A...0xF7D => .ccc130,
0xF80...0xF80 => .ccc130,
0xF82...0xF83 => .above,
0xF84...0xF84 => .virama,
0xF86...0xF87 => .above,
0xFC6...0xFC6 => .below,
0x1037...0x1037 => .nukta,
0x1039...0x103A => .virama,
0x108D...0x108D => .below,
0x135D...0x135F => .above,
0x1714...0x1715 => .virama,
0x1734...0x1734 => .virama,
0x17D2...0x17D2 => .virama,
0x17DD...0x17DD => .above,
0x18A9...0x18A9 => .above_left,
0x1939...0x1939 => .below_right,
0x193A...0x193A => .above,
0x193B...0x193B => .below,
0x1A17...0x1A17 => .above,
0x1A18...0x1A18 => .below,
0x1A60...0x1A60 => .virama,
0x1A75...0x1A7C => .above,
0x1A7F...0x1A7F => .below,
0x1AB0...0x1AB4 => .above,
0x1AB5...0x1ABA => .below,
0x1ABB...0x1ABC => .above,
0x1ABD...0x1ABD => .below,
0x1ABF...0x1AC0 => .below,
0x1AC1...0x1AC2 => .above,
0x1AC3...0x1AC4 => .below,
0x1AC5...0x1AC9 => .above,
0x1ACA...0x1ACA => .below,
0x1ACB...0x1ACE => .above,
0x1B34...0x1B34 => .nukta,
0x1B44...0x1B44 => .virama,
0x1B6B...0x1B6B => .above,
0x1B6C...0x1B6C => .below,
0x1B6D...0x1B73 => .above,
0x1BAA...0x1BAB => .virama,
0x1BE6...0x1BE6 => .nukta,
0x1BF2...0x1BF3 => .virama,
0x1C37...0x1C37 => .nukta,
0x1CD0...0x1CD2 => .above,
0x1CD4...0x1CD4 => .overlay,
0x1CD5...0x1CD9 => .below,
0x1CDA...0x1CDB => .above,
0x1CDC...0x1CDF => .below,
0x1CE0...0x1CE0 => .above,
0x1CE2...0x1CE8 => .overlay,
0x1CED...0x1CED => .below,
0x1CF4...0x1CF4 => .above,
0x1CF8...0x1CF9 => .above,
0x1DC0...0x1DC1 => .above,
0x1DC2...0x1DC2 => .below,
0x1DC3...0x1DC9 => .above,
0x1DCA...0x1DCA => .below,
0x1DCB...0x1DCC => .above,
0x1DCD...0x1DCD => .double_above,
0x1DCE...0x1DCE => .attached_above,
0x1DCF...0x1DCF => .below,
0x1DD0...0x1DD0 => .attached_below,
0x1DD1...0x1DF5 => .above,
0x1DF6...0x1DF6 => .above_right,
0x1DF7...0x1DF8 => .above_left,
0x1DF9...0x1DF9 => .below,
0x1DFA...0x1DFA => .below_left,
0x1DFB...0x1DFB => .above,
0x1DFC...0x1DFC => .double_below,
0x1DFD...0x1DFD => .below,
0x1DFE...0x1DFE => .above,
0x1DFF...0x1DFF => .below,
0x20D0...0x20D1 => .above,
0x20D2...0x20D3 => .overlay,
0x20D4...0x20D7 => .above,
0x20D8...0x20DA => .overlay,
0x20DB...0x20DC => .above,
0x20E1...0x20E1 => .above,
0x20E5...0x20E6 => .overlay,
0x20E7...0x20E7 => .above,
0x20E8...0x20E8 => .below,
0x20E9...0x20E9 => .above,
0x20EA...0x20EB => .overlay,
0x20EC...0x20EF => .below,
0x20F0...0x20F0 => .above,
0x2CEF...0x2CF1 => .above,
0x2D7F...0x2D7F => .virama,
0x2DE0...0x2DFF => .above,
0x302A...0x302A => .below_left,
0x302B...0x302B => .above_left,
0x302C...0x302C => .above_right,
0x302D...0x302D => .below_right,
0x302E...0x302F => .left,
0x3099...0x309A => .kana_voicing,
0xA66F...0xA66F => .above,
0xA674...0xA67D => .above,
0xA69E...0xA69F => .above,
0xA6F0...0xA6F1 => .above,
0xA806...0xA806 => .virama,
0xA82C...0xA82C => .virama,
0xA8C4...0xA8C4 => .virama,
0xA8E0...0xA8F1 => .above,
0xA92B...0xA92D => .below,
0xA953...0xA953 => .virama,
0xA9B3...0xA9B3 => .nukta,
0xA9C0...0xA9C0 => .virama,
0xAAB0...0xAAB0 => .above,
0xAAB2...0xAAB3 => .above,
0xAAB4...0xAAB4 => .below,
0xAAB7...0xAAB8 => .above,
0xAABE...0xAABF => .above,
0xAAC1...0xAAC1 => .above,
0xAAF6...0xAAF6 => .virama,
0xABED...0xABED => .virama,
0xFB1E...0xFB1E => .ccc26,
0xFE20...0xFE26 => .above,
0xFE27...0xFE2D => .below,
0xFE2E...0xFE2F => .above,
0x101FD...0x101FD => .below,
0x102E0...0x102E0 => .below,
0x10376...0x1037A => .above,
0x10A0D...0x10A0D => .below,
0x10A0F...0x10A0F => .above,
0x10A38...0x10A38 => .above,
0x10A39...0x10A39 => .overlay,
0x10A3A...0x10A3A => .below,
0x10A3F...0x10A3F => .virama,
0x10AE5...0x10AE5 => .above,
0x10AE6...0x10AE6 => .below,
0x10D24...0x10D27 => .above,
0x10EAB...0x10EAC => .above,
0x10EFD...0x10EFF => .below,
0x10F46...0x10F47 => .below,
0x10F48...0x10F4A => .above,
0x10F4B...0x10F4B => .below,
0x10F4C...0x10F4C => .above,
0x10F4D...0x10F50 => .below,
0x10F82...0x10F82 => .above,
0x10F83...0x10F83 => .below,
0x10F84...0x10F84 => .above,
0x10F85...0x10F85 => .below,
0x11046...0x11046 => .virama,
0x11070...0x11070 => .virama,
0x1107F...0x1107F => .virama,
0x110B9...0x110B9 => .virama,
0x110BA...0x110BA => .nukta,
0x11100...0x11102 => .above,
0x11133...0x11134 => .virama,
0x11173...0x11173 => .nukta,
0x111C0...0x111C0 => .virama,
0x111CA...0x111CA => .nukta,
0x11235...0x11235 => .virama,
0x11236...0x11236 => .nukta,
0x112E9...0x112E9 => .nukta,
0x112EA...0x112EA => .virama,
0x1133B...0x1133C => .nukta,
0x1134D...0x1134D => .virama,
0x11366...0x1136C => .above,
0x11370...0x11374 => .above,
0x11442...0x11442 => .virama,
0x11446...0x11446 => .nukta,
0x1145E...0x1145E => .above,
0x114C2...0x114C2 => .virama,
0x114C3...0x114C3 => .nukta,
0x115BF...0x115BF => .virama,
0x115C0...0x115C0 => .nukta,
0x1163F...0x1163F => .virama,
0x116B6...0x116B6 => .virama,
0x116B7...0x116B7 => .nukta,
0x1172B...0x1172B => .virama,
0x11839...0x11839 => .virama,
0x1183A...0x1183A => .nukta,
0x1193D...0x1193E => .virama,
0x11943...0x11943 => .nukta,
0x119E0...0x119E0 => .virama,
0x11A34...0x11A34 => .virama,
0x11A47...0x11A47 => .virama,
0x11A99...0x11A99 => .virama,
0x11C3F...0x11C3F => .virama,
0x11D42...0x11D42 => .nukta,
0x11D44...0x11D45 => .virama,
0x11D97...0x11D97 => .virama,
0x11F41...0x11F42 => .virama,
0x16AF0...0x16AF4 => .overlay,
0x16B30...0x16B36 => .above,
0x16FF0...0x16FF1 => .han_reading,
0x1BC9E...0x1BC9E => .overlay,
0x1D165...0x1D166 => .attached_above_right,
0x1D167...0x1D169 => .overlay,
0x1D16D...0x1D16D => .right,
0x1D16E...0x1D172 => .attached_above_right,
0x1D17B...0x1D182 => .below,
0x1D185...0x1D189 => .above,
0x1D18A...0x1D18B => .below,
0x1D1AA...0x1D1AD => .above,
0x1D242...0x1D244 => .above,
0x1E000...0x1E006 => .above,
0x1E008...0x1E018 => .above,
0x1E01B...0x1E021 => .above,
0x1E023...0x1E024 => .above,
0x1E026...0x1E02A => .above,
0x1E08F...0x1E08F => .above,
0x1E130...0x1E136 => .above,
0x1E2AE...0x1E2AE => .above,
0x1E2EC...0x1E2EF => .above,
0x1E4EC...0x1E4ED => .above_right,
0x1E4EE...0x1E4EE => .below,
0x1E4EF...0x1E4EF => .above,
0x1E8D0...0x1E8D6 => .below,
0x1E944...0x1E949 => .above,
0x1E94A...0x1E94A => .nukta,
else => .not_reordered,
};
}