mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
This moves .rc/.manifest compilation out of the main Zig binary, contributing towards #19063 Also: - Make resinator use Aro as its preprocessor instead of clang - Sync resinator with upstream
589 lines
31 KiB
Zig
589 lines
31 KiB
Zig
const std = @import("std");
|
|
|
|
pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize {
|
|
var bytes_written: usize = 0;
|
|
var utf8_buf: [3]u8 = undefined;
|
|
while (true) {
|
|
const c = reader.readByte() catch |err| switch (err) {
|
|
error.EndOfStream => return bytes_written,
|
|
else => |e| return e,
|
|
};
|
|
const codepoint = toCodepoint(c);
|
|
if (codepoint <= 0x7F) {
|
|
try writer.writeByte(c);
|
|
bytes_written += 1;
|
|
} else {
|
|
const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable;
|
|
try writer.writeAll(utf8_buf[0..utf8_len]);
|
|
bytes_written += utf8_len;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns the number of code units written to the writer
|
|
pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 {
|
|
// Guaranteed to need exactly the same number of code units as Windows-1252 bytes
|
|
var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0);
|
|
errdefer allocator.free(utf16_slice);
|
|
for (win1252_str, 0..) |c, i| {
|
|
utf16_slice[i] = toCodepoint(c);
|
|
}
|
|
return utf16_slice;
|
|
}
|
|
|
|
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
|
|
pub fn toCodepoint(c: u8) u16 {
|
|
return switch (c) {
|
|
0x80 => 0x20ac, // Euro Sign
|
|
0x82 => 0x201a, // Single Low-9 Quotation Mark
|
|
0x83 => 0x0192, // Latin Small Letter F With Hook
|
|
0x84 => 0x201e, // Double Low-9 Quotation Mark
|
|
0x85 => 0x2026, // Horizontal Ellipsis
|
|
0x86 => 0x2020, // Dagger
|
|
0x87 => 0x2021, // Double Dagger
|
|
0x88 => 0x02c6, // Modifier Letter Circumflex Accent
|
|
0x89 => 0x2030, // Per Mille Sign
|
|
0x8a => 0x0160, // Latin Capital Letter S With Caron
|
|
0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark
|
|
0x8c => 0x0152, // Latin Capital Ligature Oe
|
|
0x8e => 0x017d, // Latin Capital Letter Z With Caron
|
|
0x91 => 0x2018, // Left Single Quotation Mark
|
|
0x92 => 0x2019, // Right Single Quotation Mark
|
|
0x93 => 0x201c, // Left Double Quotation Mark
|
|
0x94 => 0x201d, // Right Double Quotation Mark
|
|
0x95 => 0x2022, // Bullet
|
|
0x96 => 0x2013, // En Dash
|
|
0x97 => 0x2014, // Em Dash
|
|
0x98 => 0x02dc, // Small Tilde
|
|
0x99 => 0x2122, // Trade Mark Sign
|
|
0x9a => 0x0161, // Latin Small Letter S With Caron
|
|
0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark
|
|
0x9c => 0x0153, // Latin Small Ligature Oe
|
|
0x9e => 0x017e, // Latin Small Letter Z With Caron
|
|
0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis
|
|
else => c,
|
|
};
|
|
}
|
|
|
|
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
|
|
/// Plus some mappings found empirically by iterating all codepoints:
|
|
/// 0x2007 => 0xA0, // Figure Space
|
|
/// 0x2008 => ' ', // Punctuation Space
|
|
/// 0x2009 => ' ', // Thin Space
|
|
/// 0x200A => ' ', // Hair Space
|
|
/// 0x2012 => '-', // Figure Dash
|
|
/// 0x2015 => '-', // Horizontal Bar
|
|
/// 0x201B => '\'', // Single High-reversed-9 Quotation Mark
|
|
/// 0x201F => '"', // Double High-reversed-9 Quotation Mark
|
|
/// 0x202F => 0xA0, // Narrow No-Break Space
|
|
/// 0x2033 => '"', // Double Prime
|
|
/// 0x2036 => '"', // Reversed Double Prime
|
|
pub fn bestFitFromCodepoint(codepoint: u21) ?u8 {
|
|
return switch (codepoint) {
|
|
0x00...0x7F,
|
|
0x81,
|
|
0x8D,
|
|
0x8F,
|
|
0x90,
|
|
0x9D,
|
|
0xA0...0xFF,
|
|
=> @intCast(codepoint),
|
|
0x0100 => 0x41, // Latin Capital Letter A With Macron
|
|
0x0101 => 0x61, // Latin Small Letter A With Macron
|
|
0x0102 => 0x41, // Latin Capital Letter A With Breve
|
|
0x0103 => 0x61, // Latin Small Letter A With Breve
|
|
0x0104 => 0x41, // Latin Capital Letter A With Ogonek
|
|
0x0105 => 0x61, // Latin Small Letter A With Ogonek
|
|
0x0106 => 0x43, // Latin Capital Letter C With Acute
|
|
0x0107 => 0x63, // Latin Small Letter C With Acute
|
|
0x0108 => 0x43, // Latin Capital Letter C With Circumflex
|
|
0x0109 => 0x63, // Latin Small Letter C With Circumflex
|
|
0x010a => 0x43, // Latin Capital Letter C With Dot Above
|
|
0x010b => 0x63, // Latin Small Letter C With Dot Above
|
|
0x010c => 0x43, // Latin Capital Letter C With Caron
|
|
0x010d => 0x63, // Latin Small Letter C With Caron
|
|
0x010e => 0x44, // Latin Capital Letter D With Caron
|
|
0x010f => 0x64, // Latin Small Letter D With Caron
|
|
0x0110 => 0xd0, // Latin Capital Letter D With Stroke
|
|
0x0111 => 0x64, // Latin Small Letter D With Stroke
|
|
0x0112 => 0x45, // Latin Capital Letter E With Macron
|
|
0x0113 => 0x65, // Latin Small Letter E With Macron
|
|
0x0114 => 0x45, // Latin Capital Letter E With Breve
|
|
0x0115 => 0x65, // Latin Small Letter E With Breve
|
|
0x0116 => 0x45, // Latin Capital Letter E With Dot Above
|
|
0x0117 => 0x65, // Latin Small Letter E With Dot Above
|
|
0x0118 => 0x45, // Latin Capital Letter E With Ogonek
|
|
0x0119 => 0x65, // Latin Small Letter E With Ogonek
|
|
0x011a => 0x45, // Latin Capital Letter E With Caron
|
|
0x011b => 0x65, // Latin Small Letter E With Caron
|
|
0x011c => 0x47, // Latin Capital Letter G With Circumflex
|
|
0x011d => 0x67, // Latin Small Letter G With Circumflex
|
|
0x011e => 0x47, // Latin Capital Letter G With Breve
|
|
0x011f => 0x67, // Latin Small Letter G With Breve
|
|
0x0120 => 0x47, // Latin Capital Letter G With Dot Above
|
|
0x0121 => 0x67, // Latin Small Letter G With Dot Above
|
|
0x0122 => 0x47, // Latin Capital Letter G With Cedilla
|
|
0x0123 => 0x67, // Latin Small Letter G With Cedilla
|
|
0x0124 => 0x48, // Latin Capital Letter H With Circumflex
|
|
0x0125 => 0x68, // Latin Small Letter H With Circumflex
|
|
0x0126 => 0x48, // Latin Capital Letter H With Stroke
|
|
0x0127 => 0x68, // Latin Small Letter H With Stroke
|
|
0x0128 => 0x49, // Latin Capital Letter I With Tilde
|
|
0x0129 => 0x69, // Latin Small Letter I With Tilde
|
|
0x012a => 0x49, // Latin Capital Letter I With Macron
|
|
0x012b => 0x69, // Latin Small Letter I With Macron
|
|
0x012c => 0x49, // Latin Capital Letter I With Breve
|
|
0x012d => 0x69, // Latin Small Letter I With Breve
|
|
0x012e => 0x49, // Latin Capital Letter I With Ogonek
|
|
0x012f => 0x69, // Latin Small Letter I With Ogonek
|
|
0x0130 => 0x49, // Latin Capital Letter I With Dot Above
|
|
0x0131 => 0x69, // Latin Small Letter Dotless I
|
|
0x0134 => 0x4a, // Latin Capital Letter J With Circumflex
|
|
0x0135 => 0x6a, // Latin Small Letter J With Circumflex
|
|
0x0136 => 0x4b, // Latin Capital Letter K With Cedilla
|
|
0x0137 => 0x6b, // Latin Small Letter K With Cedilla
|
|
0x0139 => 0x4c, // Latin Capital Letter L With Acute
|
|
0x013a => 0x6c, // Latin Small Letter L With Acute
|
|
0x013b => 0x4c, // Latin Capital Letter L With Cedilla
|
|
0x013c => 0x6c, // Latin Small Letter L With Cedilla
|
|
0x013d => 0x4c, // Latin Capital Letter L With Caron
|
|
0x013e => 0x6c, // Latin Small Letter L With Caron
|
|
0x0141 => 0x4c, // Latin Capital Letter L With Stroke
|
|
0x0142 => 0x6c, // Latin Small Letter L With Stroke
|
|
0x0143 => 0x4e, // Latin Capital Letter N With Acute
|
|
0x0144 => 0x6e, // Latin Small Letter N With Acute
|
|
0x0145 => 0x4e, // Latin Capital Letter N With Cedilla
|
|
0x0146 => 0x6e, // Latin Small Letter N With Cedilla
|
|
0x0147 => 0x4e, // Latin Capital Letter N With Caron
|
|
0x0148 => 0x6e, // Latin Small Letter N With Caron
|
|
0x014c => 0x4f, // Latin Capital Letter O With Macron
|
|
0x014d => 0x6f, // Latin Small Letter O With Macron
|
|
0x014e => 0x4f, // Latin Capital Letter O With Breve
|
|
0x014f => 0x6f, // Latin Small Letter O With Breve
|
|
0x0150 => 0x4f, // Latin Capital Letter O With Double Acute
|
|
0x0151 => 0x6f, // Latin Small Letter O With Double Acute
|
|
0x0152 => 0x8c, // Latin Capital Ligature Oe
|
|
0x0153 => 0x9c, // Latin Small Ligature Oe
|
|
0x0154 => 0x52, // Latin Capital Letter R With Acute
|
|
0x0155 => 0x72, // Latin Small Letter R With Acute
|
|
0x0156 => 0x52, // Latin Capital Letter R With Cedilla
|
|
0x0157 => 0x72, // Latin Small Letter R With Cedilla
|
|
0x0158 => 0x52, // Latin Capital Letter R With Caron
|
|
0x0159 => 0x72, // Latin Small Letter R With Caron
|
|
0x015a => 0x53, // Latin Capital Letter S With Acute
|
|
0x015b => 0x73, // Latin Small Letter S With Acute
|
|
0x015c => 0x53, // Latin Capital Letter S With Circumflex
|
|
0x015d => 0x73, // Latin Small Letter S With Circumflex
|
|
0x015e => 0x53, // Latin Capital Letter S With Cedilla
|
|
0x015f => 0x73, // Latin Small Letter S With Cedilla
|
|
0x0160 => 0x8a, // Latin Capital Letter S With Caron
|
|
0x0161 => 0x9a, // Latin Small Letter S With Caron
|
|
0x0162 => 0x54, // Latin Capital Letter T With Cedilla
|
|
0x0163 => 0x74, // Latin Small Letter T With Cedilla
|
|
0x0164 => 0x54, // Latin Capital Letter T With Caron
|
|
0x0165 => 0x74, // Latin Small Letter T With Caron
|
|
0x0166 => 0x54, // Latin Capital Letter T With Stroke
|
|
0x0167 => 0x74, // Latin Small Letter T With Stroke
|
|
0x0168 => 0x55, // Latin Capital Letter U With Tilde
|
|
0x0169 => 0x75, // Latin Small Letter U With Tilde
|
|
0x016a => 0x55, // Latin Capital Letter U With Macron
|
|
0x016b => 0x75, // Latin Small Letter U With Macron
|
|
0x016c => 0x55, // Latin Capital Letter U With Breve
|
|
0x016d => 0x75, // Latin Small Letter U With Breve
|
|
0x016e => 0x55, // Latin Capital Letter U With Ring Above
|
|
0x016f => 0x75, // Latin Small Letter U With Ring Above
|
|
0x0170 => 0x55, // Latin Capital Letter U With Double Acute
|
|
0x0171 => 0x75, // Latin Small Letter U With Double Acute
|
|
0x0172 => 0x55, // Latin Capital Letter U With Ogonek
|
|
0x0173 => 0x75, // Latin Small Letter U With Ogonek
|
|
0x0174 => 0x57, // Latin Capital Letter W With Circumflex
|
|
0x0175 => 0x77, // Latin Small Letter W With Circumflex
|
|
0x0176 => 0x59, // Latin Capital Letter Y With Circumflex
|
|
0x0177 => 0x79, // Latin Small Letter Y With Circumflex
|
|
0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis
|
|
0x0179 => 0x5a, // Latin Capital Letter Z With Acute
|
|
0x017a => 0x7a, // Latin Small Letter Z With Acute
|
|
0x017b => 0x5a, // Latin Capital Letter Z With Dot Above
|
|
0x017c => 0x7a, // Latin Small Letter Z With Dot Above
|
|
0x017d => 0x8e, // Latin Capital Letter Z With Caron
|
|
0x017e => 0x9e, // Latin Small Letter Z With Caron
|
|
0x0180 => 0x62, // Latin Small Letter B With Stroke
|
|
0x0189 => 0xd0, // Latin Capital Letter African D
|
|
0x0191 => 0x83, // Latin Capital Letter F With Hook
|
|
0x0192 => 0x83, // Latin Small Letter F With Hook
|
|
0x0197 => 0x49, // Latin Capital Letter I With Stroke
|
|
0x019a => 0x6c, // Latin Small Letter L With Bar
|
|
0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde
|
|
0x01a0 => 0x4f, // Latin Capital Letter O With Horn
|
|
0x01a1 => 0x6f, // Latin Small Letter O With Horn
|
|
0x01ab => 0x74, // Latin Small Letter T With Palatal Hook
|
|
0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook
|
|
0x01af => 0x55, // Latin Capital Letter U With Horn
|
|
0x01b0 => 0x75, // Latin Small Letter U With Horn
|
|
0x01b6 => 0x7a, // Latin Small Letter Z With Stroke
|
|
0x01c0 => 0x7c, // Latin Letter Dental Click
|
|
0x01c3 => 0x21, // Latin Letter Retroflex Click
|
|
0x01cd => 0x41, // Latin Capital Letter A With Caron
|
|
0x01ce => 0x61, // Latin Small Letter A With Caron
|
|
0x01cf => 0x49, // Latin Capital Letter I With Caron
|
|
0x01d0 => 0x69, // Latin Small Letter I With Caron
|
|
0x01d1 => 0x4f, // Latin Capital Letter O With Caron
|
|
0x01d2 => 0x6f, // Latin Small Letter O With Caron
|
|
0x01d3 => 0x55, // Latin Capital Letter U With Caron
|
|
0x01d4 => 0x75, // Latin Small Letter U With Caron
|
|
0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron
|
|
0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron
|
|
0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute
|
|
0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute
|
|
0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron
|
|
0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron
|
|
0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave
|
|
0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave
|
|
0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron
|
|
0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron
|
|
0x01e4 => 0x47, // Latin Capital Letter G With Stroke
|
|
0x01e5 => 0x67, // Latin Small Letter G With Stroke
|
|
0x01e6 => 0x47, // Latin Capital Letter G With Caron
|
|
0x01e7 => 0x67, // Latin Small Letter G With Caron
|
|
0x01e8 => 0x4b, // Latin Capital Letter K With Caron
|
|
0x01e9 => 0x6b, // Latin Small Letter K With Caron
|
|
0x01ea => 0x4f, // Latin Capital Letter O With Ogonek
|
|
0x01eb => 0x6f, // Latin Small Letter O With Ogonek
|
|
0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron
|
|
0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron
|
|
0x01f0 => 0x6a, // Latin Small Letter J With Caron
|
|
0x0261 => 0x67, // Latin Small Letter Script G
|
|
0x02b9 => 0x27, // Modifier Letter Prime
|
|
0x02ba => 0x22, // Modifier Letter Double Prime
|
|
0x02bc => 0x27, // Modifier Letter Apostrophe
|
|
0x02c4 => 0x5e, // Modifier Letter Up Arrowhead
|
|
0x02c6 => 0x88, // Modifier Letter Circumflex Accent
|
|
0x02c8 => 0x27, // Modifier Letter Vertical Line
|
|
0x02c9 => 0xaf, // Modifier Letter Macron
|
|
0x02ca => 0xb4, // Modifier Letter Acute Accent
|
|
0x02cb => 0x60, // Modifier Letter Grave Accent
|
|
0x02cd => 0x5f, // Modifier Letter Low Macron
|
|
0x02da => 0xb0, // Ring Above
|
|
0x02dc => 0x98, // Small Tilde
|
|
0x0300 => 0x60, // Combining Grave Accent
|
|
0x0301 => 0xb4, // Combining Acute Accent
|
|
0x0302 => 0x5e, // Combining Circumflex Accent
|
|
0x0303 => 0x7e, // Combining Tilde
|
|
0x0304 => 0xaf, // Combining Macron
|
|
0x0305 => 0xaf, // Combining Overline
|
|
0x0308 => 0xa8, // Combining Diaeresis
|
|
0x030a => 0xb0, // Combining Ring Above
|
|
0x030e => 0x22, // Combining Double Vertical Line Above
|
|
0x0327 => 0xb8, // Combining Cedilla
|
|
0x0331 => 0x5f, // Combining Macron Below
|
|
0x0332 => 0x5f, // Combining Low Line
|
|
0x037e => 0x3b, // Greek Question Mark
|
|
0x0393 => 0x47, // Greek Capital Letter Gamma
|
|
0x0398 => 0x54, // Greek Capital Letter Theta
|
|
0x03a3 => 0x53, // Greek Capital Letter Sigma
|
|
0x03a6 => 0x46, // Greek Capital Letter Phi
|
|
0x03a9 => 0x4f, // Greek Capital Letter Omega
|
|
0x03b1 => 0x61, // Greek Small Letter Alpha
|
|
0x03b2 => 0xdf, // Greek Small Letter Beta
|
|
0x03b4 => 0x64, // Greek Small Letter Delta
|
|
0x03b5 => 0x65, // Greek Small Letter Epsilon
|
|
0x03bc => 0xb5, // Greek Small Letter Mu
|
|
0x03c0 => 0x70, // Greek Small Letter Pi
|
|
0x03c3 => 0x73, // Greek Small Letter Sigma
|
|
0x03c4 => 0x74, // Greek Small Letter Tau
|
|
0x03c6 => 0x66, // Greek Small Letter Phi
|
|
0x04bb => 0x68, // Cyrillic Small Letter Shha
|
|
0x0589 => 0x3a, // Armenian Full Stop
|
|
0x066a => 0x25, // Arabic Percent Sign
|
|
0x2000 => 0x20, // En Quad
|
|
0x2001 => 0x20, // Em Quad
|
|
0x2002 => 0x20, // En Space
|
|
0x2003 => 0x20, // Em Space
|
|
0x2004 => 0x20, // Three-Per-Em Space
|
|
0x2005 => 0x20, // Four-Per-Em Space
|
|
0x2006 => 0x20, // Six-Per-Em Space
|
|
0x2010 => 0x2d, // Hyphen
|
|
0x2011 => 0x2d, // Non-Breaking Hyphen
|
|
0x2013 => 0x96, // En Dash
|
|
0x2014 => 0x97, // Em Dash
|
|
0x2017 => 0x3d, // Double Low Line
|
|
0x2018 => 0x91, // Left Single Quotation Mark
|
|
0x2019 => 0x92, // Right Single Quotation Mark
|
|
0x201a => 0x82, // Single Low-9 Quotation Mark
|
|
0x201c => 0x93, // Left Double Quotation Mark
|
|
0x201d => 0x94, // Right Double Quotation Mark
|
|
0x201e => 0x84, // Double Low-9 Quotation Mark
|
|
0x2020 => 0x86, // Dagger
|
|
0x2021 => 0x87, // Double Dagger
|
|
0x2022 => 0x95, // Bullet
|
|
0x2024 => 0xb7, // One Dot Leader
|
|
0x2026 => 0x85, // Horizontal Ellipsis
|
|
0x2030 => 0x89, // Per Mille Sign
|
|
0x2032 => 0x27, // Prime
|
|
0x2035 => 0x60, // Reversed Prime
|
|
0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark
|
|
0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark
|
|
0x2044 => 0x2f, // Fraction Slash
|
|
0x2070 => 0xb0, // Superscript Zero
|
|
0x2074 => 0x34, // Superscript Four
|
|
0x2075 => 0x35, // Superscript Five
|
|
0x2076 => 0x36, // Superscript Six
|
|
0x2077 => 0x37, // Superscript Seven
|
|
0x2078 => 0x38, // Superscript Eight
|
|
0x207f => 0x6e, // Superscript Latin Small Letter N
|
|
0x2080 => 0x30, // Subscript Zero
|
|
0x2081 => 0x31, // Subscript One
|
|
0x2082 => 0x32, // Subscript Two
|
|
0x2083 => 0x33, // Subscript Three
|
|
0x2084 => 0x34, // Subscript Four
|
|
0x2085 => 0x35, // Subscript Five
|
|
0x2086 => 0x36, // Subscript Six
|
|
0x2087 => 0x37, // Subscript Seven
|
|
0x2088 => 0x38, // Subscript Eight
|
|
0x2089 => 0x39, // Subscript Nine
|
|
0x20ac => 0x80, // Euro Sign
|
|
0x20a1 => 0xa2, // Colon Sign
|
|
0x20a4 => 0xa3, // Lira Sign
|
|
0x20a7 => 0x50, // Peseta Sign
|
|
0x2102 => 0x43, // Double-Struck Capital C
|
|
0x2107 => 0x45, // Euler Constant
|
|
0x210a => 0x67, // Script Small G
|
|
0x210b => 0x48, // Script Capital H
|
|
0x210c => 0x48, // Black-Letter Capital H
|
|
0x210d => 0x48, // Double-Struck Capital H
|
|
0x210e => 0x68, // Planck Constant
|
|
0x2110 => 0x49, // Script Capital I
|
|
0x2111 => 0x49, // Black-Letter Capital I
|
|
0x2112 => 0x4c, // Script Capital L
|
|
0x2113 => 0x6c, // Script Small L
|
|
0x2115 => 0x4e, // Double-Struck Capital N
|
|
0x2118 => 0x50, // Script Capital P
|
|
0x2119 => 0x50, // Double-Struck Capital P
|
|
0x211a => 0x51, // Double-Struck Capital Q
|
|
0x211b => 0x52, // Script Capital R
|
|
0x211c => 0x52, // Black-Letter Capital R
|
|
0x211d => 0x52, // Double-Struck Capital R
|
|
0x2122 => 0x99, // Trade Mark Sign
|
|
0x2124 => 0x5a, // Double-Struck Capital Z
|
|
0x2128 => 0x5a, // Black-Letter Capital Z
|
|
0x212a => 0x4b, // Kelvin Sign
|
|
0x212b => 0xc5, // Angstrom Sign
|
|
0x212c => 0x42, // Script Capital B
|
|
0x212d => 0x43, // Black-Letter Capital C
|
|
0x212e => 0x65, // Estimated Symbol
|
|
0x212f => 0x65, // Script Small E
|
|
0x2130 => 0x45, // Script Capital E
|
|
0x2131 => 0x46, // Script Capital F
|
|
0x2133 => 0x4d, // Script Capital M
|
|
0x2134 => 0x6f, // Script Small O
|
|
0x2205 => 0xd8, // Empty Set
|
|
0x2212 => 0x2d, // Minus Sign
|
|
0x2213 => 0xb1, // Minus-Or-Plus Sign
|
|
0x2215 => 0x2f, // Division Slash
|
|
0x2216 => 0x5c, // Set Minus
|
|
0x2217 => 0x2a, // Asterisk Operator
|
|
0x2218 => 0xb0, // Ring Operator
|
|
0x2219 => 0xb7, // Bullet Operator
|
|
0x221a => 0x76, // Square Root
|
|
0x221e => 0x38, // Infinity
|
|
0x2223 => 0x7c, // Divides
|
|
0x2229 => 0x6e, // Intersection
|
|
0x2236 => 0x3a, // Ratio
|
|
0x223c => 0x7e, // Tilde Operator
|
|
0x2248 => 0x98, // Almost Equal To
|
|
0x2261 => 0x3d, // Identical To
|
|
0x2264 => 0x3d, // Less-Than Or Equal To
|
|
0x2265 => 0x3d, // Greater-Than Or Equal To
|
|
0x226a => 0xab, // Much Less-Than
|
|
0x226b => 0xbb, // Much Greater-Than
|
|
0x22c5 => 0xb7, // Dot Operator
|
|
0x2302 => 0xa6, // House
|
|
0x2303 => 0x5e, // Up Arrowhead
|
|
0x2310 => 0xac, // Reversed Not Sign
|
|
0x2320 => 0x28, // Top Half Integral
|
|
0x2321 => 0x29, // Bottom Half Integral
|
|
0x2329 => 0x3c, // Left-Pointing Angle Bracket
|
|
0x232a => 0x3e, // Right-Pointing Angle Bracket
|
|
0x2500 => 0x2d, // Box Drawings Light Horizontal
|
|
0x2502 => 0xa6, // Box Drawings Light Vertical
|
|
0x250c => 0x2b, // Box Drawings Light Down And Right
|
|
0x2510 => 0x2b, // Box Drawings Light Down And Left
|
|
0x2514 => 0x2b, // Box Drawings Light Up And Right
|
|
0x2518 => 0x2b, // Box Drawings Light Up And Left
|
|
0x251c => 0x2b, // Box Drawings Light Vertical And Right
|
|
0x2524 => 0xa6, // Box Drawings Light Vertical And Left
|
|
0x252c => 0x2d, // Box Drawings Light Down And Horizontal
|
|
0x2534 => 0x2d, // Box Drawings Light Up And Horizontal
|
|
0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal
|
|
0x2550 => 0x2d, // Box Drawings Double Horizontal
|
|
0x2551 => 0xa6, // Box Drawings Double Vertical
|
|
0x2552 => 0x2b, // Box Drawings Down Single And Right Double
|
|
0x2553 => 0x2b, // Box Drawings Down Double And Right Single
|
|
0x2554 => 0x2b, // Box Drawings Double Down And Right
|
|
0x2555 => 0x2b, // Box Drawings Down Single And Left Double
|
|
0x2556 => 0x2b, // Box Drawings Down Double And Left Single
|
|
0x2557 => 0x2b, // Box Drawings Double Down And Left
|
|
0x2558 => 0x2b, // Box Drawings Up Single And Right Double
|
|
0x2559 => 0x2b, // Box Drawings Up Double And Right Single
|
|
0x255a => 0x2b, // Box Drawings Double Up And Right
|
|
0x255b => 0x2b, // Box Drawings Up Single And Left Double
|
|
0x255c => 0x2b, // Box Drawings Up Double And Left Single
|
|
0x255d => 0x2b, // Box Drawings Double Up And Left
|
|
0x255e => 0xa6, // Box Drawings Vertical Single And Right Double
|
|
0x255f => 0xa6, // Box Drawings Vertical Double And Right Single
|
|
0x2560 => 0xa6, // Box Drawings Double Vertical And Right
|
|
0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double
|
|
0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single
|
|
0x2563 => 0xa6, // Box Drawings Double Vertical And Left
|
|
0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double
|
|
0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single
|
|
0x2566 => 0x2d, // Box Drawings Double Down And Horizontal
|
|
0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double
|
|
0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single
|
|
0x2569 => 0x2d, // Box Drawings Double Up And Horizontal
|
|
0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double
|
|
0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single
|
|
0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal
|
|
0x2580 => 0xaf, // Upper Half Block
|
|
0x2584 => 0x5f, // Lower Half Block
|
|
0x2588 => 0xa6, // Full Block
|
|
0x258c => 0xa6, // Left Half Block
|
|
0x2590 => 0xa6, // Right Half Block
|
|
0x2591 => 0xa6, // Light Shade
|
|
0x2592 => 0xa6, // Medium Shade
|
|
0x2593 => 0xa6, // Dark Shade
|
|
0x25a0 => 0xa6, // Black Square
|
|
0x263c => 0xa4, // White Sun With Rays
|
|
0x2758 => 0x7c, // Light Vertical Bar
|
|
0x3000 => 0x20, // Ideographic Space
|
|
0x3008 => 0x3c, // Left Angle Bracket
|
|
0x3009 => 0x3e, // Right Angle Bracket
|
|
0x300a => 0xab, // Left Double Angle Bracket
|
|
0x300b => 0xbb, // Right Double Angle Bracket
|
|
0x301a => 0x5b, // Left White Square Bracket
|
|
0x301b => 0x5d, // Right White Square Bracket
|
|
0x30fb => 0xb7, // Katakana Middle Dot
|
|
0xff01 => 0x21, // Fullwidth Exclamation Mark
|
|
0xff02 => 0x22, // Fullwidth Quotation Mark
|
|
0xff03 => 0x23, // Fullwidth Number Sign
|
|
0xff04 => 0x24, // Fullwidth Dollar Sign
|
|
0xff05 => 0x25, // Fullwidth Percent Sign
|
|
0xff06 => 0x26, // Fullwidth Ampersand
|
|
0xff07 => 0x27, // Fullwidth Apostrophe
|
|
0xff08 => 0x28, // Fullwidth Left Parenthesis
|
|
0xff09 => 0x29, // Fullwidth Right Parenthesis
|
|
0xff0a => 0x2a, // Fullwidth Asterisk
|
|
0xff0b => 0x2b, // Fullwidth Plus Sign
|
|
0xff0c => 0x2c, // Fullwidth Comma
|
|
0xff0d => 0x2d, // Fullwidth Hyphen-Minus
|
|
0xff0e => 0x2e, // Fullwidth Full Stop
|
|
0xff0f => 0x2f, // Fullwidth Solidus
|
|
0xff10 => 0x30, // Fullwidth Digit Zero
|
|
0xff11 => 0x31, // Fullwidth Digit One
|
|
0xff12 => 0x32, // Fullwidth Digit Two
|
|
0xff13 => 0x33, // Fullwidth Digit Three
|
|
0xff14 => 0x34, // Fullwidth Digit Four
|
|
0xff15 => 0x35, // Fullwidth Digit Five
|
|
0xff16 => 0x36, // Fullwidth Digit Six
|
|
0xff17 => 0x37, // Fullwidth Digit Seven
|
|
0xff18 => 0x38, // Fullwidth Digit Eight
|
|
0xff19 => 0x39, // Fullwidth Digit Nine
|
|
0xff1a => 0x3a, // Fullwidth Colon
|
|
0xff1b => 0x3b, // Fullwidth Semicolon
|
|
0xff1c => 0x3c, // Fullwidth Less-Than Sign
|
|
0xff1d => 0x3d, // Fullwidth Equals Sign
|
|
0xff1e => 0x3e, // Fullwidth Greater-Than Sign
|
|
0xff1f => 0x3f, // Fullwidth Question Mark
|
|
0xff20 => 0x40, // Fullwidth Commercial At
|
|
0xff21 => 0x41, // Fullwidth Latin Capital Letter A
|
|
0xff22 => 0x42, // Fullwidth Latin Capital Letter B
|
|
0xff23 => 0x43, // Fullwidth Latin Capital Letter C
|
|
0xff24 => 0x44, // Fullwidth Latin Capital Letter D
|
|
0xff25 => 0x45, // Fullwidth Latin Capital Letter E
|
|
0xff26 => 0x46, // Fullwidth Latin Capital Letter F
|
|
0xff27 => 0x47, // Fullwidth Latin Capital Letter G
|
|
0xff28 => 0x48, // Fullwidth Latin Capital Letter H
|
|
0xff29 => 0x49, // Fullwidth Latin Capital Letter I
|
|
0xff2a => 0x4a, // Fullwidth Latin Capital Letter J
|
|
0xff2b => 0x4b, // Fullwidth Latin Capital Letter K
|
|
0xff2c => 0x4c, // Fullwidth Latin Capital Letter L
|
|
0xff2d => 0x4d, // Fullwidth Latin Capital Letter M
|
|
0xff2e => 0x4e, // Fullwidth Latin Capital Letter N
|
|
0xff2f => 0x4f, // Fullwidth Latin Capital Letter O
|
|
0xff30 => 0x50, // Fullwidth Latin Capital Letter P
|
|
0xff31 => 0x51, // Fullwidth Latin Capital Letter Q
|
|
0xff32 => 0x52, // Fullwidth Latin Capital Letter R
|
|
0xff33 => 0x53, // Fullwidth Latin Capital Letter S
|
|
0xff34 => 0x54, // Fullwidth Latin Capital Letter T
|
|
0xff35 => 0x55, // Fullwidth Latin Capital Letter U
|
|
0xff36 => 0x56, // Fullwidth Latin Capital Letter V
|
|
0xff37 => 0x57, // Fullwidth Latin Capital Letter W
|
|
0xff38 => 0x58, // Fullwidth Latin Capital Letter X
|
|
0xff39 => 0x59, // Fullwidth Latin Capital Letter Y
|
|
0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z
|
|
0xff3b => 0x5b, // Fullwidth Left Square Bracket
|
|
0xff3c => 0x5c, // Fullwidth Reverse Solidus
|
|
0xff3d => 0x5d, // Fullwidth Right Square Bracket
|
|
0xff3e => 0x5e, // Fullwidth Circumflex Accent
|
|
0xff3f => 0x5f, // Fullwidth Low Line
|
|
0xff40 => 0x60, // Fullwidth Grave Accent
|
|
0xff41 => 0x61, // Fullwidth Latin Small Letter A
|
|
0xff42 => 0x62, // Fullwidth Latin Small Letter B
|
|
0xff43 => 0x63, // Fullwidth Latin Small Letter C
|
|
0xff44 => 0x64, // Fullwidth Latin Small Letter D
|
|
0xff45 => 0x65, // Fullwidth Latin Small Letter E
|
|
0xff46 => 0x66, // Fullwidth Latin Small Letter F
|
|
0xff47 => 0x67, // Fullwidth Latin Small Letter G
|
|
0xff48 => 0x68, // Fullwidth Latin Small Letter H
|
|
0xff49 => 0x69, // Fullwidth Latin Small Letter I
|
|
0xff4a => 0x6a, // Fullwidth Latin Small Letter J
|
|
0xff4b => 0x6b, // Fullwidth Latin Small Letter K
|
|
0xff4c => 0x6c, // Fullwidth Latin Small Letter L
|
|
0xff4d => 0x6d, // Fullwidth Latin Small Letter M
|
|
0xff4e => 0x6e, // Fullwidth Latin Small Letter N
|
|
0xff4f => 0x6f, // Fullwidth Latin Small Letter O
|
|
0xff50 => 0x70, // Fullwidth Latin Small Letter P
|
|
0xff51 => 0x71, // Fullwidth Latin Small Letter Q
|
|
0xff52 => 0x72, // Fullwidth Latin Small Letter R
|
|
0xff53 => 0x73, // Fullwidth Latin Small Letter S
|
|
0xff54 => 0x74, // Fullwidth Latin Small Letter T
|
|
0xff55 => 0x75, // Fullwidth Latin Small Letter U
|
|
0xff56 => 0x76, // Fullwidth Latin Small Letter V
|
|
0xff57 => 0x77, // Fullwidth Latin Small Letter W
|
|
0xff58 => 0x78, // Fullwidth Latin Small Letter X
|
|
0xff59 => 0x79, // Fullwidth Latin Small Letter Y
|
|
0xff5a => 0x7a, // Fullwidth Latin Small Letter Z
|
|
0xff5b => 0x7b, // Fullwidth Left Curly Bracket
|
|
0xff5c => 0x7c, // Fullwidth Vertical Line
|
|
0xff5d => 0x7d, // Fullwidth Right Curly Bracket
|
|
0xff5e => 0x7e, // Fullwidth Tilde
|
|
// Not in the best fit mapping, but RC uses these mappings too
|
|
0x2007 => 0xA0, // Figure Space
|
|
0x2008 => ' ', // Punctuation Space
|
|
0x2009 => ' ', // Thin Space
|
|
0x200A => ' ', // Hair Space
|
|
0x2012 => '-', // Figure Dash
|
|
0x2015 => '-', // Horizontal Bar
|
|
0x201B => '\'', // Single High-reversed-9 Quotation Mark
|
|
0x201F => '"', // Double High-reversed-9 Quotation Mark
|
|
0x202F => 0xA0, // Narrow No-Break Space
|
|
0x2033 => '"', // Double Prime
|
|
0x2036 => '"', // Reversed Double Prime
|
|
else => null,
|
|
};
|
|
}
|
|
|
|
test "windows-1252 to utf8" {
|
|
var buf = std.ArrayList(u8).init(std.testing.allocator);
|
|
defer buf.deinit();
|
|
|
|
const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
|
|
const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
|
|
|
|
var fbs = std.io.fixedBufferStream(input_windows1252);
|
|
const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader());
|
|
|
|
try std.testing.expectEqualStrings(expected_utf8, buf.items);
|
|
try std.testing.expectEqual(expected_utf8.len, bytes_written);
|
|
}
|