mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 21:08:36 +00:00
Merge pull request #17069 from squeek502/resinator
Add a `.rc` -> `.res` compiler to the Zig compiler
This commit is contained in:
commit
3fc7413574
@ -90,6 +90,14 @@ is_linking_libc: bool,
|
||||
is_linking_libcpp: bool,
|
||||
vcpkg_bin_path: ?[]const u8 = null,
|
||||
|
||||
// keep in sync with src/Compilation.zig:RcIncludes
|
||||
/// Behavior of automatic detection of include directories when compiling .rc files.
|
||||
/// any: Use MSVC if available, fall back to MinGW.
|
||||
/// msvc: Use MSVC include paths (must be present on the system).
|
||||
/// gnu: Use MinGW include paths (distributed with Zig).
|
||||
/// none: Do not use any autodetected include paths.
|
||||
rc_includes: enum { any, msvc, gnu, none } = .any,
|
||||
|
||||
installed_path: ?[]const u8,
|
||||
|
||||
/// Base address for an executable image.
|
||||
@ -221,6 +229,26 @@ pub const CSourceFile = struct {
|
||||
}
|
||||
};
|
||||
|
||||
pub const RcSourceFile = struct {
|
||||
file: LazyPath,
|
||||
/// Any option that rc.exe accepts will work here, with the exception of:
|
||||
/// - `/fo`: The output filename is set by the build system
|
||||
/// - Any MUI-related option
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line-
|
||||
///
|
||||
/// Implicitly defined options:
|
||||
/// /x (ignore the INCLUDE environment variable)
|
||||
/// /D_DEBUG or /DNDEBUG depending on the optimization mode
|
||||
flags: []const []const u8 = &.{},
|
||||
|
||||
pub fn dupe(self: RcSourceFile, b: *std.Build) RcSourceFile {
|
||||
return .{
|
||||
.file = self.file.dupe(b),
|
||||
.flags = b.dupeStrings(self.flags),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const LinkObject = union(enum) {
|
||||
static_path: LazyPath,
|
||||
other_step: *Compile,
|
||||
@ -228,6 +256,7 @@ pub const LinkObject = union(enum) {
|
||||
assembly_file: LazyPath,
|
||||
c_source_file: *CSourceFile,
|
||||
c_source_files: *CSourceFiles,
|
||||
win32_resource_file: *RcSourceFile,
|
||||
};
|
||||
|
||||
pub const SystemLib = struct {
|
||||
@ -910,6 +939,18 @@ pub fn addCSourceFile(self: *Compile, source: CSourceFile) void {
|
||||
source.file.addStepDependencies(&self.step);
|
||||
}
|
||||
|
||||
pub fn addWin32ResourceFile(self: *Compile, source: RcSourceFile) void {
|
||||
// Only the PE/COFF format has a Resource Table, so for any other target
|
||||
// the resource file is just ignored.
|
||||
if (self.target.getObjectFormat() != .coff) return;
|
||||
|
||||
const b = self.step.owner;
|
||||
const rc_source_file = b.allocator.create(RcSourceFile) catch @panic("OOM");
|
||||
rc_source_file.* = source.dupe(b);
|
||||
self.link_objects.append(.{ .win32_resource_file = rc_source_file }) catch @panic("OOM");
|
||||
source.file.addStepDependencies(&self.step);
|
||||
}
|
||||
|
||||
pub fn setVerboseLink(self: *Compile, value: bool) void {
|
||||
self.verbose_link = value;
|
||||
}
|
||||
@ -1358,6 +1399,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
|
||||
try transitive_deps.add(self.link_objects.items);
|
||||
|
||||
var prev_has_cflags = false;
|
||||
var prev_has_rcflags = false;
|
||||
var prev_search_strategy: SystemLib.SearchStrategy = .paths_first;
|
||||
var prev_preferred_link_mode: std.builtin.LinkMode = .Dynamic;
|
||||
|
||||
@ -1500,6 +1542,24 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
|
||||
try zig_args.append(b.pathFromRoot(file));
|
||||
}
|
||||
},
|
||||
|
||||
.win32_resource_file => |rc_source_file| {
|
||||
if (rc_source_file.flags.len == 0) {
|
||||
if (prev_has_rcflags) {
|
||||
try zig_args.append("-rcflags");
|
||||
try zig_args.append("--");
|
||||
prev_has_rcflags = false;
|
||||
}
|
||||
} else {
|
||||
try zig_args.append("-rcflags");
|
||||
for (rc_source_file.flags) |arg| {
|
||||
try zig_args.append(arg);
|
||||
}
|
||||
try zig_args.append("--");
|
||||
prev_has_rcflags = true;
|
||||
}
|
||||
try zig_args.append(rc_source_file.file.getPath(b));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@ -1897,6 +1957,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
|
||||
}
|
||||
}
|
||||
|
||||
if (self.rc_includes != .any) {
|
||||
try zig_args.append("-rcincludes");
|
||||
try zig_args.append(@tagName(self.rc_includes));
|
||||
}
|
||||
|
||||
try addFlag(&zig_args, "valgrind", self.valgrind_support);
|
||||
try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath);
|
||||
|
||||
|
||||
@ -421,7 +421,7 @@ pub const Wip = struct {
|
||||
_ = try addExtra(wip, rt);
|
||||
}
|
||||
|
||||
pub fn addBundle(wip: *Wip, other: ErrorBundle) !void {
|
||||
pub fn addBundleAsNotes(wip: *Wip, other: ErrorBundle) !void {
|
||||
const gpa = wip.gpa;
|
||||
|
||||
try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len);
|
||||
@ -436,6 +436,21 @@ pub const Wip = struct {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn addBundleAsRoots(wip: *Wip, other: ErrorBundle) !void {
|
||||
const gpa = wip.gpa;
|
||||
|
||||
try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len);
|
||||
try wip.extra.ensureUnusedCapacity(gpa, other.extra.len);
|
||||
|
||||
const other_list = other.getMessages();
|
||||
|
||||
try wip.root_list.ensureUnusedCapacity(gpa, other_list.len);
|
||||
for (other_list) |other_msg| {
|
||||
// The ensureUnusedCapacity calls above guarantees this.
|
||||
wip.root_list.appendAssumeCapacity(wip.addOtherMessage(other, other_msg) catch unreachable);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reserveNotes(wip: *Wip, notes_len: u32) !u32 {
|
||||
try wip.extra.ensureUnusedCapacity(wip.gpa, notes_len +
|
||||
notes_len * @typeInfo(ErrorBundle.ErrorMessage).Struct.fields.len);
|
||||
@ -474,7 +489,10 @@ pub const Wip = struct {
|
||||
.span_start = other_sl.span_start,
|
||||
.span_main = other_sl.span_main,
|
||||
.span_end = other_sl.span_end,
|
||||
.source_line = try wip.addString(other.nullTerminatedString(other_sl.source_line)),
|
||||
.source_line = if (other_sl.source_line != 0)
|
||||
try wip.addString(other.nullTerminatedString(other_sl.source_line))
|
||||
else
|
||||
0,
|
||||
.reference_trace_len = other_sl.reference_trace_len,
|
||||
});
|
||||
|
||||
|
||||
@ -39,6 +39,7 @@ const libtsan = @import("libtsan.zig");
|
||||
const Zir = @import("Zir.zig");
|
||||
const Autodoc = @import("Autodoc.zig");
|
||||
const Color = @import("main.zig").Color;
|
||||
const resinator = @import("resinator.zig");
|
||||
|
||||
/// General-purpose allocator. Used for both temporary and long-term storage.
|
||||
gpa: Allocator,
|
||||
@ -46,6 +47,7 @@ gpa: Allocator,
|
||||
arena_state: std.heap.ArenaAllocator.State,
|
||||
bin_file: *link.File,
|
||||
c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{},
|
||||
win32_resource_table: std.AutoArrayHashMapUnmanaged(*Win32Resource, void) = .{},
|
||||
/// This is a pointer to a local variable inside `update()`.
|
||||
whole_cache_manifest: ?*Cache.Manifest = null,
|
||||
whole_cache_manifest_mutex: std.Thread.Mutex = .{},
|
||||
@ -60,6 +62,10 @@ anon_work_queue: std.fifo.LinearFifo(Job, .Dynamic),
|
||||
/// gets linked with the Compilation.
|
||||
c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
|
||||
|
||||
/// These jobs are to invoke the RC compiler to create a compiled resource file (.res), which
|
||||
/// gets linked with the Compilation.
|
||||
win32_resource_work_queue: std.fifo.LinearFifo(*Win32Resource, .Dynamic),
|
||||
|
||||
/// These jobs are to tokenize, parse, and astgen files, which may be outdated
|
||||
/// since the last compilation, as well as scan for `@import` and queue up
|
||||
/// additional jobs corresponding to those new files.
|
||||
@ -73,6 +79,10 @@ embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic),
|
||||
/// This data is accessed by multiple threads and is protected by `mutex`.
|
||||
failed_c_objects: std.AutoArrayHashMapUnmanaged(*CObject, *CObject.ErrorMsg) = .{},
|
||||
|
||||
/// The ErrorBundle memory is owned by the `Win32Resource`, using Compilation's general purpose allocator.
|
||||
/// This data is accessed by multiple threads and is protected by `mutex`.
|
||||
failed_win32_resources: std.AutoArrayHashMapUnmanaged(*Win32Resource, ErrorBundle) = .{},
|
||||
|
||||
/// Miscellaneous things that can fail.
|
||||
misc_failures: std.AutoArrayHashMapUnmanaged(MiscTask, MiscError) = .{},
|
||||
|
||||
@ -109,6 +119,7 @@ last_update_was_cache_hit: bool = false,
|
||||
|
||||
c_source_files: []const CSourceFile,
|
||||
clang_argv: []const []const u8,
|
||||
rc_source_files: []const RcSourceFile,
|
||||
cache_parent: *Cache,
|
||||
/// Path to own executable for invoking `zig clang`.
|
||||
self_exe_path: ?[]const u8,
|
||||
@ -125,6 +136,7 @@ local_cache_directory: Directory,
|
||||
global_cache_directory: Directory,
|
||||
libc_include_dir_list: []const []const u8,
|
||||
libc_framework_dir_list: []const []const u8,
|
||||
rc_include_dir_list: []const []const u8,
|
||||
thread_pool: *ThreadPool,
|
||||
|
||||
/// Populated when we build the libc++ static library. A Job to build this is placed in the queue
|
||||
@ -225,6 +237,23 @@ pub const CSourceFile = struct {
|
||||
ext: ?FileExt = null,
|
||||
};
|
||||
|
||||
/// For passing to resinator.
|
||||
pub const RcSourceFile = struct {
|
||||
src_path: []const u8,
|
||||
extra_flags: []const []const u8 = &.{},
|
||||
};
|
||||
|
||||
pub const RcIncludes = enum {
|
||||
/// Use MSVC if available, fall back to MinGW.
|
||||
any,
|
||||
/// Use MSVC include paths (MSVC install + Windows SDK, must be present on the system).
|
||||
msvc,
|
||||
/// Use MinGW include paths (distributed with Zig).
|
||||
gnu,
|
||||
/// Do not use any autodetected include paths.
|
||||
none,
|
||||
};
|
||||
|
||||
const Job = union(enum) {
|
||||
/// Write the constant value for a Decl to the output file.
|
||||
codegen_decl: Module.Decl.Index,
|
||||
@ -326,6 +355,50 @@ pub const CObject = struct {
|
||||
}
|
||||
};
|
||||
|
||||
pub const Win32Resource = struct {
|
||||
/// Relative to cwd. Owned by arena.
|
||||
src: RcSourceFile,
|
||||
status: union(enum) {
|
||||
new,
|
||||
success: struct {
|
||||
/// The outputted result. Owned by gpa.
|
||||
res_path: []u8,
|
||||
/// This is a file system lock on the cache hash manifest representing this
|
||||
/// object. It prevents other invocations of the Zig compiler from interfering
|
||||
/// with this object until released.
|
||||
lock: Cache.Lock,
|
||||
},
|
||||
/// There will be a corresponding ErrorMsg in Compilation.failed_win32_resources.
|
||||
failure,
|
||||
/// A transient failure happened when trying to compile the resource file; it may
|
||||
/// succeed if we try again. There may be a corresponding ErrorMsg in
|
||||
/// Compilation.failed_win32_resources. If there is not, the failure is out of memory.
|
||||
failure_retryable,
|
||||
},
|
||||
|
||||
/// Returns true if there was failure.
|
||||
pub fn clearStatus(self: *Win32Resource, gpa: Allocator) bool {
|
||||
switch (self.status) {
|
||||
.new => return false,
|
||||
.failure, .failure_retryable => {
|
||||
self.status = .new;
|
||||
return true;
|
||||
},
|
||||
.success => |*success| {
|
||||
gpa.free(success.res_path);
|
||||
success.lock.release();
|
||||
self.status = .new;
|
||||
return false;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Win32Resource, gpa: Allocator) void {
|
||||
_ = self.clearStatus(gpa);
|
||||
gpa.destroy(self);
|
||||
}
|
||||
};
|
||||
|
||||
pub const MiscTask = enum {
|
||||
write_builtin_zig,
|
||||
glibc_crt_file,
|
||||
@ -505,6 +578,8 @@ pub const InitOptions = struct {
|
||||
rpath_list: []const []const u8 = &[0][]const u8{},
|
||||
symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{},
|
||||
c_source_files: []const CSourceFile = &[0]CSourceFile{},
|
||||
rc_source_files: []const RcSourceFile = &[0]RcSourceFile{},
|
||||
rc_includes: RcIncludes = .any,
|
||||
link_objects: []LinkObject = &[0]LinkObject{},
|
||||
framework_dirs: []const []const u8 = &[0][]const u8{},
|
||||
frameworks: []const Framework = &.{},
|
||||
@ -938,6 +1013,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
|
||||
options.libc_installation,
|
||||
);
|
||||
|
||||
const rc_dirs = try detectWin32ResourceIncludeDirs(
|
||||
arena,
|
||||
options,
|
||||
);
|
||||
|
||||
const sysroot = options.sysroot orelse libc_dirs.sysroot;
|
||||
|
||||
const must_pie = target_util.requiresPIE(options.target);
|
||||
@ -1591,16 +1671,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
|
||||
.work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
|
||||
.anon_work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
|
||||
.c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
|
||||
.win32_resource_work_queue = std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa),
|
||||
.astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa),
|
||||
.embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa),
|
||||
.keep_source_files_loaded = options.keep_source_files_loaded,
|
||||
.use_clang = use_clang,
|
||||
.clang_argv = options.clang_argv,
|
||||
.c_source_files = options.c_source_files,
|
||||
.rc_source_files = options.rc_source_files,
|
||||
.cache_parent = cache,
|
||||
.self_exe_path = options.self_exe_path,
|
||||
.libc_include_dir_list = libc_dirs.libc_include_dir_list,
|
||||
.libc_framework_dir_list = libc_dirs.libc_framework_dir_list,
|
||||
.rc_include_dir_list = rc_dirs.libc_include_dir_list,
|
||||
.sanitize_c = sanitize_c,
|
||||
.thread_pool = options.thread_pool,
|
||||
.clang_passthrough_mode = options.clang_passthrough_mode,
|
||||
@ -1647,6 +1730,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
|
||||
comp.c_object_table.putAssumeCapacityNoClobber(c_object, {});
|
||||
}
|
||||
|
||||
// Add a `Win32Resource` for each `rc_source_files`.
|
||||
try comp.win32_resource_table.ensureTotalCapacity(gpa, options.rc_source_files.len);
|
||||
for (options.rc_source_files) |rc_source_file| {
|
||||
const win32_resource = try gpa.create(Win32Resource);
|
||||
errdefer gpa.destroy(win32_resource);
|
||||
|
||||
win32_resource.* = .{
|
||||
.status = .{ .new = {} },
|
||||
.src = rc_source_file,
|
||||
};
|
||||
comp.win32_resource_table.putAssumeCapacityNoClobber(win32_resource, {});
|
||||
}
|
||||
|
||||
const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null;
|
||||
|
||||
if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies and target.ofmt != .c) {
|
||||
@ -1804,6 +1900,7 @@ pub fn destroy(self: *Compilation) void {
|
||||
self.work_queue.deinit();
|
||||
self.anon_work_queue.deinit();
|
||||
self.c_object_work_queue.deinit();
|
||||
self.win32_resource_work_queue.deinit();
|
||||
self.astgen_work_queue.deinit();
|
||||
self.embed_file_work_queue.deinit();
|
||||
|
||||
@ -1852,6 +1949,16 @@ pub fn destroy(self: *Compilation) void {
|
||||
}
|
||||
self.failed_c_objects.deinit(gpa);
|
||||
|
||||
for (self.win32_resource_table.keys()) |key| {
|
||||
key.destroy(gpa);
|
||||
}
|
||||
self.win32_resource_table.deinit(gpa);
|
||||
|
||||
for (self.failed_win32_resources.values()) |*value| {
|
||||
value.deinit(gpa);
|
||||
}
|
||||
self.failed_win32_resources.deinit(gpa);
|
||||
|
||||
for (self.lld_errors.items) |*lld_error| {
|
||||
lld_error.deinit(gpa);
|
||||
}
|
||||
@ -2014,6 +2121,13 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void
|
||||
comp.c_object_work_queue.writeItemAssumeCapacity(key);
|
||||
}
|
||||
|
||||
// For compiling Win32 resources, we rely on the cache hash system to avoid duplicating work.
|
||||
// Add a Job for each Win32 resource file.
|
||||
try comp.win32_resource_work_queue.ensureUnusedCapacity(comp.win32_resource_table.count());
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
comp.win32_resource_work_queue.writeItemAssumeCapacity(key);
|
||||
}
|
||||
|
||||
if (comp.bin_file.options.module) |module| {
|
||||
module.compile_log_text.shrinkAndFree(module.gpa, 0);
|
||||
module.generation += 1;
|
||||
@ -2336,6 +2450,13 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes
|
||||
man.hash.addListOfBytes(key.src.extra_flags);
|
||||
}
|
||||
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
_ = try man.addFile(key.src.src_path, null);
|
||||
man.hash.addListOfBytes(key.src.extra_flags);
|
||||
}
|
||||
|
||||
man.hash.addListOfBytes(comp.rc_include_dir_list);
|
||||
|
||||
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_asm);
|
||||
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_ir);
|
||||
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_bc);
|
||||
@ -2571,8 +2692,14 @@ pub fn makeBinFileWritable(self: *Compilation) !void {
|
||||
|
||||
/// This function is temporally single-threaded.
|
||||
pub fn totalErrorCount(self: *Compilation) u32 {
|
||||
var total: usize = self.failed_c_objects.count() + self.misc_failures.count() +
|
||||
@intFromBool(self.alloc_failure_occurred) + self.lld_errors.items.len;
|
||||
var total: usize = self.failed_c_objects.count() +
|
||||
self.misc_failures.count() +
|
||||
@intFromBool(self.alloc_failure_occurred) +
|
||||
self.lld_errors.items.len;
|
||||
|
||||
for (self.failed_win32_resources.values()) |errs| {
|
||||
total += errs.errorMessageCount();
|
||||
}
|
||||
|
||||
if (self.bin_file.options.module) |module| {
|
||||
total += module.failed_exports.count();
|
||||
@ -2664,6 +2791,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle {
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
var it = self.failed_win32_resources.iterator();
|
||||
while (it.next()) |entry| {
|
||||
try bundle.addBundleAsRoots(entry.value_ptr.*);
|
||||
}
|
||||
}
|
||||
|
||||
for (self.lld_errors.items) |lld_error| {
|
||||
const notes_len = @as(u32, @intCast(lld_error.context_lines.len));
|
||||
|
||||
@ -2683,7 +2817,7 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle {
|
||||
.msg = try bundle.addString(value.msg),
|
||||
.notes_len = if (value.children) |b| b.errorMessageCount() else 0,
|
||||
});
|
||||
if (value.children) |b| try bundle.addBundle(b);
|
||||
if (value.children) |b| try bundle.addBundleAsNotes(b);
|
||||
}
|
||||
if (self.alloc_failure_occurred) {
|
||||
try bundle.addRootErrorMessage(.{
|
||||
@ -3082,6 +3216,9 @@ pub fn performAllTheWork(
|
||||
var c_obj_prog_node = main_progress_node.start("Compile C Objects", comp.c_source_files.len);
|
||||
defer c_obj_prog_node.end();
|
||||
|
||||
var win32_resource_prog_node = main_progress_node.start("Compile Win32 Resources", comp.rc_source_files.len);
|
||||
defer win32_resource_prog_node.end();
|
||||
|
||||
var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", comp.embed_file_work_queue.count);
|
||||
defer embed_file_prog_node.end();
|
||||
|
||||
@ -3130,6 +3267,13 @@ pub fn performAllTheWork(
|
||||
comp, c_object, &c_obj_prog_node, &comp.work_queue_wait_group,
|
||||
});
|
||||
}
|
||||
|
||||
while (comp.win32_resource_work_queue.readItem()) |win32_resource| {
|
||||
comp.work_queue_wait_group.start();
|
||||
try comp.thread_pool.spawn(workerUpdateWin32Resource, .{
|
||||
comp, win32_resource, &win32_resource_prog_node, &comp.work_queue_wait_group,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (comp.bin_file.options.module) |mod| {
|
||||
@ -3659,6 +3803,14 @@ pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest {
|
||||
return man;
|
||||
}
|
||||
|
||||
pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest {
|
||||
var man = comp.cache_parent.obtain();
|
||||
|
||||
man.hash.addListOfBytes(comp.rc_include_dir_list);
|
||||
|
||||
return man;
|
||||
}
|
||||
|
||||
test "cImport" {
|
||||
_ = cImport;
|
||||
}
|
||||
@ -3832,6 +3984,26 @@ fn workerUpdateCObject(
|
||||
};
|
||||
}
|
||||
|
||||
fn workerUpdateWin32Resource(
|
||||
comp: *Compilation,
|
||||
win32_resource: *Win32Resource,
|
||||
progress_node: *std.Progress.Node,
|
||||
wg: *WaitGroup,
|
||||
) void {
|
||||
defer wg.finish();
|
||||
|
||||
comp.updateWin32Resource(win32_resource, progress_node) catch |err| switch (err) {
|
||||
error.AnalysisFail => return,
|
||||
else => {
|
||||
comp.reportRetryableWin32ResourceError(win32_resource, err) catch |oom| switch (oom) {
|
||||
// Swallowing this error is OK because it's implied to be OOM when
|
||||
// there is a missing failed_win32_resources error message.
|
||||
error.OutOfMemory => {},
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn buildCompilerRtOneShot(
|
||||
comp: *Compilation,
|
||||
output_mode: std.builtin.OutputMode,
|
||||
@ -3877,6 +4049,18 @@ fn reportRetryableCObjectError(
|
||||
}
|
||||
}
|
||||
|
||||
fn reportRetryableWin32ResourceError(
|
||||
comp: *Compilation,
|
||||
win32_resource: *Win32Resource,
|
||||
err: anyerror,
|
||||
) error{OutOfMemory}!void {
|
||||
win32_resource.status = .failure_retryable;
|
||||
|
||||
// TODO: something
|
||||
_ = comp;
|
||||
_ = @errorName(err);
|
||||
}
|
||||
|
||||
fn reportRetryableAstGenError(
|
||||
comp: *Compilation,
|
||||
src: AstGenSrc,
|
||||
@ -4233,6 +4417,311 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P
|
||||
};
|
||||
}
|
||||
|
||||
fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void {
|
||||
if (!build_options.have_llvm) {
|
||||
return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{});
|
||||
}
|
||||
const self_exe_path = comp.self_exe_path orelse
|
||||
return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{});
|
||||
|
||||
const tracy_trace = trace(@src());
|
||||
defer tracy_trace.end();
|
||||
|
||||
log.debug("updating win32 resource: {s}", .{win32_resource.src.src_path});
|
||||
|
||||
if (win32_resource.clearStatus(comp.gpa)) {
|
||||
// There was previous failure.
|
||||
comp.mutex.lock();
|
||||
defer comp.mutex.unlock();
|
||||
// If the failure was OOM, there will not be an entry here, so we do
|
||||
// not assert discard.
|
||||
_ = comp.failed_win32_resources.swapRemove(win32_resource);
|
||||
}
|
||||
|
||||
var man = comp.obtainWin32ResourceCacheManifest();
|
||||
defer man.deinit();
|
||||
|
||||
_ = try man.addFile(win32_resource.src.src_path, null);
|
||||
man.hash.addListOfBytes(win32_resource.src.extra_flags);
|
||||
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
const rc_basename = std.fs.path.basename(win32_resource.src.src_path);
|
||||
|
||||
win32_resource_prog_node.activate();
|
||||
var child_progress_node = win32_resource_prog_node.start(rc_basename, 0);
|
||||
child_progress_node.activate();
|
||||
defer child_progress_node.end();
|
||||
|
||||
const rc_basename_noext = rc_basename[0 .. rc_basename.len - std.fs.path.extension(rc_basename).len];
|
||||
|
||||
const digest = if (try man.hit()) man.final() else blk: {
|
||||
const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext});
|
||||
|
||||
const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename);
|
||||
var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{});
|
||||
defer zig_cache_tmp_dir.close();
|
||||
|
||||
const res_filename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext});
|
||||
|
||||
// We can't know the digest until we do the compilation,
|
||||
// so we need a temporary filename.
|
||||
const out_res_path = try comp.tmpFilePath(arena, res_filename);
|
||||
|
||||
var options = options: {
|
||||
var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, win32_resource.src.extra_flags.len + 4);
|
||||
defer resinator_args.deinit(comp.gpa);
|
||||
|
||||
resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg
|
||||
resinator_args.appendSliceAssumeCapacity(win32_resource.src.extra_flags);
|
||||
resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path });
|
||||
|
||||
var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa);
|
||||
defer cli_diagnostics.deinit();
|
||||
var options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) {
|
||||
error.ParseError => {
|
||||
return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics);
|
||||
},
|
||||
else => |e| return e,
|
||||
};
|
||||
break :options options;
|
||||
};
|
||||
defer options.deinit();
|
||||
|
||||
var argv = std.ArrayList([]const u8).init(comp.gpa);
|
||||
defer argv.deinit();
|
||||
var temp_strings = std.ArrayList([]const u8).init(comp.gpa);
|
||||
defer {
|
||||
for (temp_strings.items) |temp_string| {
|
||||
comp.gpa.free(temp_string);
|
||||
}
|
||||
temp_strings.deinit();
|
||||
}
|
||||
|
||||
// TODO: support options.preprocess == .no and .only
|
||||
// alternatively, error if those options are used
|
||||
try argv.appendSlice(&[_][]const u8{
|
||||
self_exe_path,
|
||||
"clang",
|
||||
"-E", // preprocessor only
|
||||
"--comments",
|
||||
"-fuse-line-directives", // #line <num> instead of # <num>
|
||||
"-xc", // output c
|
||||
"-Werror=null-character", // error on null characters instead of converting them to spaces
|
||||
"-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things
|
||||
"-DRC_INVOKED", // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros
|
||||
});
|
||||
// Using -fms-compatibility and targeting the gnu abi interact in a strange way:
|
||||
// - Targeting the GNU abi stops _MSC_VER from being defined
|
||||
// - Passing -fms-compatibility stops __GNUC__ from being defined
|
||||
// Neither being defined is a problem for things like things like MinGW's
|
||||
// vadefs.h, which will fail during preprocessing if neither are defined.
|
||||
// So, when targeting the GNU abi, we need to force __GNUC__ to be defined.
|
||||
//
|
||||
// TODO: This is a workaround that should be removed if possible.
|
||||
if (comp.getTarget().isGnu()) {
|
||||
// This is the same default gnuc version that Clang uses:
|
||||
// https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738
|
||||
try argv.append("-fgnuc-version=4.2.1");
|
||||
}
|
||||
for (options.extra_include_paths.items) |extra_include_path| {
|
||||
try argv.append("--include-directory");
|
||||
try argv.append(extra_include_path);
|
||||
}
|
||||
var symbol_it = options.symbols.iterator();
|
||||
while (symbol_it.next()) |entry| {
|
||||
switch (entry.value_ptr.*) {
|
||||
.define => |value| {
|
||||
try argv.append("-D");
|
||||
const define_arg = arg: {
|
||||
const arg = try std.fmt.allocPrint(comp.gpa, "{s}={s}", .{ entry.key_ptr.*, value });
|
||||
errdefer comp.gpa.free(arg);
|
||||
try temp_strings.append(arg);
|
||||
break :arg arg;
|
||||
};
|
||||
try argv.append(define_arg);
|
||||
},
|
||||
.undefine => {
|
||||
try argv.append("-U");
|
||||
try argv.append(entry.key_ptr.*);
|
||||
},
|
||||
}
|
||||
}
|
||||
try argv.append(win32_resource.src.src_path);
|
||||
try argv.appendSlice(&[_][]const u8{
|
||||
"-o",
|
||||
out_rcpp_path,
|
||||
});
|
||||
|
||||
const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path});
|
||||
// Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization
|
||||
// mode. While these defines are not normally present when calling rc.exe directly,
|
||||
// them being defined matches the behavior of how MSVC calls rc.exe which is the more
|
||||
// relevant behavior in this case.
|
||||
try comp.addCCArgs(arena, &argv, .rc, out_dep_path);
|
||||
|
||||
if (comp.verbose_cc) {
|
||||
dump_argv(argv.items);
|
||||
}
|
||||
|
||||
if (std.process.can_spawn) {
|
||||
var child = std.ChildProcess.init(argv.items, arena);
|
||||
child.stdin_behavior = .Ignore;
|
||||
child.stdout_behavior = .Ignore;
|
||||
child.stderr_behavior = .Pipe;
|
||||
|
||||
try child.spawn();
|
||||
|
||||
const stderr_reader = child.stderr.?.reader();
|
||||
|
||||
const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024);
|
||||
|
||||
const term = child.wait() catch |err| {
|
||||
return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) });
|
||||
};
|
||||
|
||||
switch (term) {
|
||||
.Exited => |code| {
|
||||
if (code != 0) {
|
||||
// TODO parse clang stderr and turn it into an error message
|
||||
// and then call failCObjWithOwnedErrorMsg
|
||||
log.err("clang preprocessor failed with stderr:\n{s}", .{stderr});
|
||||
return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code});
|
||||
}
|
||||
},
|
||||
else => {
|
||||
log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr});
|
||||
return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{});
|
||||
},
|
||||
}
|
||||
} else {
|
||||
const exit_code = try clangMain(arena, argv.items);
|
||||
if (exit_code != 0) {
|
||||
return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code});
|
||||
}
|
||||
}
|
||||
|
||||
const dep_basename = std.fs.path.basename(out_dep_path);
|
||||
// Add the files depended on to the cache system.
|
||||
try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
|
||||
if (comp.whole_cache_manifest) |whole_cache_manifest| {
|
||||
comp.whole_cache_manifest_mutex.lock();
|
||||
defer comp.whole_cache_manifest_mutex.unlock();
|
||||
try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
|
||||
}
|
||||
// Just to save disk space, we delete the file because it is never needed again.
|
||||
zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
|
||||
log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) });
|
||||
};
|
||||
|
||||
var full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) {
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
else => |e| {
|
||||
return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) });
|
||||
},
|
||||
};
|
||||
|
||||
var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = win32_resource.src.src_path });
|
||||
defer mapping_results.mappings.deinit(arena);
|
||||
|
||||
var final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings);
|
||||
|
||||
var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| {
|
||||
return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) });
|
||||
};
|
||||
var output_file_closed = false;
|
||||
defer if (!output_file_closed) output_file.close();
|
||||
|
||||
var diagnostics = resinator.errors.Diagnostics.init(arena);
|
||||
defer diagnostics.deinit();
|
||||
|
||||
var dependencies_list = std.ArrayList([]const u8).init(comp.gpa);
|
||||
defer {
|
||||
for (dependencies_list.items) |item| {
|
||||
comp.gpa.free(item);
|
||||
}
|
||||
dependencies_list.deinit();
|
||||
}
|
||||
|
||||
var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
|
||||
|
||||
resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{
|
||||
.cwd = std.fs.cwd(),
|
||||
.diagnostics = &diagnostics,
|
||||
.source_mappings = &mapping_results.mappings,
|
||||
.dependencies_list = &dependencies_list,
|
||||
.system_include_paths = comp.rc_include_dir_list,
|
||||
.ignore_include_env_var = true,
|
||||
// options
|
||||
.extra_include_paths = options.extra_include_paths.items,
|
||||
.default_language_id = options.default_language_id,
|
||||
.default_code_page = options.default_code_page orelse .windows1252,
|
||||
.verbose = options.verbose,
|
||||
.null_terminate_string_table_strings = options.null_terminate_string_table_strings,
|
||||
.max_string_literal_codepoints = options.max_string_literal_codepoints,
|
||||
.silent_duplicate_control_ids = options.silent_duplicate_control_ids,
|
||||
.warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
|
||||
}) catch |err| switch (err) {
|
||||
error.ParseError, error.CompileError => {
|
||||
// Delete the output file on error
|
||||
output_file.close();
|
||||
output_file_closed = true;
|
||||
// Failing to delete is not really a big deal, so swallow any errors
|
||||
zig_cache_tmp_dir.deleteFile(out_res_path) catch {
|
||||
log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) });
|
||||
};
|
||||
return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings);
|
||||
},
|
||||
else => |e| return e,
|
||||
};
|
||||
|
||||
try output_buffered_stream.flush();
|
||||
|
||||
for (dependencies_list.items) |dep_file_path| {
|
||||
try man.addFilePost(dep_file_path);
|
||||
if (comp.whole_cache_manifest) |whole_cache_manifest| {
|
||||
comp.whole_cache_manifest_mutex.lock();
|
||||
defer comp.whole_cache_manifest_mutex.unlock();
|
||||
try whole_cache_manifest.addFilePost(dep_file_path);
|
||||
}
|
||||
}
|
||||
|
||||
// Rename into place.
|
||||
const digest = man.final();
|
||||
const o_sub_path = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest });
|
||||
var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{});
|
||||
defer o_dir.close();
|
||||
const tmp_basename = std.fs.path.basename(out_res_path);
|
||||
try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename);
|
||||
const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path);
|
||||
try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename);
|
||||
break :blk digest;
|
||||
};
|
||||
|
||||
if (man.have_exclusive_lock) {
|
||||
// Write the updated manifest. This is a no-op if the manifest is not dirty. Note that it is
|
||||
// possible we had a hit and the manifest is dirty, for example if the file mtime changed but
|
||||
// the contents were the same, we hit the cache but the manifest is dirty and we need to update
|
||||
// it to prevent doing a full file content comparison the next time around.
|
||||
man.writeManifest() catch |err| {
|
||||
log.warn("failed to write cache manifest when compiling '{s}': {s}", .{ win32_resource.src.src_path, @errorName(err) });
|
||||
};
|
||||
}
|
||||
|
||||
const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext});
|
||||
|
||||
win32_resource.status = .{
|
||||
.success = .{
|
||||
.res_path = try comp.local_cache_directory.join(comp.gpa, &[_][]const u8{
|
||||
"o", &digest, res_basename,
|
||||
}),
|
||||
.lock = man.toOwnedLock(),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 {
|
||||
const s = std.fs.path.sep_str;
|
||||
const rand_int = std.crypto.random.int(u64);
|
||||
@ -4350,7 +4839,7 @@ pub fn addCCArgs(
|
||||
try argv.appendSlice(&[_][]const u8{ "-target", llvm_triple });
|
||||
|
||||
switch (ext) {
|
||||
.c, .cpp, .m, .mm, .h, .cu => {
|
||||
.c, .cpp, .m, .mm, .h, .cu, .rc => {
|
||||
try argv.appendSlice(&[_][]const u8{
|
||||
"-nostdinc",
|
||||
"-fno-spell-checking",
|
||||
@ -4378,9 +4867,16 @@ pub fn addCCArgs(
|
||||
try argv.append("-isystem");
|
||||
try argv.append(c_headers_dir);
|
||||
|
||||
for (comp.libc_include_dir_list) |include_dir| {
|
||||
try argv.append("-isystem");
|
||||
try argv.append(include_dir);
|
||||
if (ext == .rc) {
|
||||
for (comp.rc_include_dir_list) |include_dir| {
|
||||
try argv.append("-isystem");
|
||||
try argv.append(include_dir);
|
||||
}
|
||||
} else {
|
||||
for (comp.libc_include_dir_list) |include_dir| {
|
||||
try argv.append("-isystem");
|
||||
try argv.append(include_dir);
|
||||
}
|
||||
}
|
||||
|
||||
if (target.cpu.model.llvm_name) |llvm_name| {
|
||||
@ -4692,6 +5188,253 @@ fn failCObjWithOwnedErrorMsg(
|
||||
return error.AnalysisFail;
|
||||
}
|
||||
|
||||
/// The include directories used when preprocessing .rc files are separate from the
|
||||
/// target. Which include directories are used is determined by `options.rc_includes`.
|
||||
///
|
||||
/// Note: It should be okay that the include directories used when compiling .rc
|
||||
/// files differ from the include directories used when compiling the main
|
||||
/// binary, since the .res format is not dependent on anything ABI-related. The
|
||||
/// only relevant differences would be things like `#define` constants being
|
||||
/// different in the MinGW headers vs the MSVC headers, but any such
|
||||
/// differences would likely be a MinGW bug.
|
||||
fn detectWin32ResourceIncludeDirs(arena: Allocator, options: InitOptions) !LibCDirs {
|
||||
// Set the includes to .none here when there are no rc files to compile
|
||||
var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none;
|
||||
if (builtin.target.os.tag != .windows) {
|
||||
switch (includes) {
|
||||
// MSVC can't be found when the host isn't Windows, so short-circuit.
|
||||
.msvc => return error.WindowsSdkNotFound,
|
||||
// Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts.
|
||||
.any => includes = .gnu,
|
||||
.none, .gnu => {},
|
||||
}
|
||||
}
|
||||
while (true) {
|
||||
switch (includes) {
|
||||
.any, .msvc => return detectLibCIncludeDirs(
|
||||
arena,
|
||||
options.zig_lib_directory.path.?,
|
||||
.{
|
||||
.cpu = options.target.cpu,
|
||||
.os = options.target.os,
|
||||
.abi = .msvc,
|
||||
.ofmt = options.target.ofmt,
|
||||
},
|
||||
options.is_native_abi,
|
||||
// The .rc preprocessor will need to know the libc include dirs even if we
|
||||
// are not linking libc, so force 'link_libc' to true
|
||||
true,
|
||||
options.libc_installation,
|
||||
) catch |err| {
|
||||
if (includes == .any) {
|
||||
// fall back to mingw
|
||||
includes = .gnu;
|
||||
continue;
|
||||
}
|
||||
return err;
|
||||
},
|
||||
.gnu => return detectLibCFromBuilding(arena, options.zig_lib_directory.path.?, .{
|
||||
.cpu = options.target.cpu,
|
||||
.os = options.target.os,
|
||||
.abi = .gnu,
|
||||
.ofmt = options.target.ofmt,
|
||||
}),
|
||||
.none => return LibCDirs{
|
||||
.libc_include_dir_list = &[0][]u8{},
|
||||
.libc_installation = null,
|
||||
.libc_framework_dir_list = &.{},
|
||||
.sysroot = null,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn failWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, comptime format: []const u8, args: anytype) SemaError {
|
||||
@setCold(true);
|
||||
var bundle: ErrorBundle.Wip = undefined;
|
||||
try bundle.init(comp.gpa);
|
||||
errdefer bundle.deinit();
|
||||
try bundle.addRootErrorMessage(.{
|
||||
.msg = try bundle.printString(format, args),
|
||||
.src_loc = try bundle.addSourceLocation(.{
|
||||
.src_path = try bundle.addString(win32_resource.src.src_path),
|
||||
.line = 0,
|
||||
.column = 0,
|
||||
.span_start = 0,
|
||||
.span_main = 0,
|
||||
.span_end = 0,
|
||||
}),
|
||||
});
|
||||
const finished_bundle = try bundle.toOwnedBundle("");
|
||||
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
|
||||
}
|
||||
|
||||
fn failWin32ResourceWithOwnedBundle(
|
||||
comp: *Compilation,
|
||||
win32_resource: *Win32Resource,
|
||||
err_bundle: ErrorBundle,
|
||||
) SemaError {
|
||||
@setCold(true);
|
||||
{
|
||||
comp.mutex.lock();
|
||||
defer comp.mutex.unlock();
|
||||
try comp.failed_win32_resources.putNoClobber(comp.gpa, win32_resource, err_bundle);
|
||||
}
|
||||
win32_resource.status = .failure;
|
||||
return error.AnalysisFail;
|
||||
}
|
||||
|
||||
fn failWin32ResourceCli(
|
||||
comp: *Compilation,
|
||||
win32_resource: *Win32Resource,
|
||||
diagnostics: *resinator.cli.Diagnostics,
|
||||
) SemaError {
|
||||
@setCold(true);
|
||||
|
||||
var bundle: ErrorBundle.Wip = undefined;
|
||||
try bundle.init(comp.gpa);
|
||||
errdefer bundle.deinit();
|
||||
|
||||
try bundle.addRootErrorMessage(.{
|
||||
.msg = try bundle.addString("invalid command line option(s)"),
|
||||
.src_loc = try bundle.addSourceLocation(.{
|
||||
.src_path = try bundle.addString(win32_resource.src.src_path),
|
||||
.line = 0,
|
||||
.column = 0,
|
||||
.span_start = 0,
|
||||
.span_main = 0,
|
||||
.span_end = 0,
|
||||
}),
|
||||
});
|
||||
|
||||
var cur_err: ?ErrorBundle.ErrorMessage = null;
|
||||
var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
|
||||
defer cur_notes.deinit(comp.gpa);
|
||||
for (diagnostics.errors.items) |err_details| {
|
||||
switch (err_details.type) {
|
||||
.err => {
|
||||
if (cur_err) |err| {
|
||||
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
|
||||
}
|
||||
cur_err = .{
|
||||
.msg = try bundle.addString(err_details.msg.items),
|
||||
};
|
||||
cur_notes.clearRetainingCapacity();
|
||||
},
|
||||
.warning => cur_err = null,
|
||||
.note => {
|
||||
if (cur_err == null) continue;
|
||||
cur_err.?.notes_len += 1;
|
||||
try cur_notes.append(comp.gpa, .{
|
||||
.msg = try bundle.addString(err_details.msg.items),
|
||||
});
|
||||
},
|
||||
}
|
||||
}
|
||||
if (cur_err) |err| {
|
||||
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
|
||||
}
|
||||
|
||||
const finished_bundle = try bundle.toOwnedBundle("");
|
||||
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
|
||||
}
|
||||
|
||||
fn failWin32ResourceCompile(
|
||||
comp: *Compilation,
|
||||
win32_resource: *Win32Resource,
|
||||
source: []const u8,
|
||||
diagnostics: *resinator.errors.Diagnostics,
|
||||
mappings: resinator.source_mapping.SourceMappings,
|
||||
) SemaError {
|
||||
@setCold(true);
|
||||
|
||||
var bundle: ErrorBundle.Wip = undefined;
|
||||
try bundle.init(comp.gpa);
|
||||
errdefer bundle.deinit();
|
||||
|
||||
var msg_buf: std.ArrayListUnmanaged(u8) = .{};
|
||||
defer msg_buf.deinit(comp.gpa);
|
||||
var cur_err: ?ErrorBundle.ErrorMessage = null;
|
||||
var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
|
||||
defer cur_notes.deinit(comp.gpa);
|
||||
for (diagnostics.errors.items) |err_details| {
|
||||
switch (err_details.type) {
|
||||
.hint => continue,
|
||||
// Clear the current error so that notes don't bleed into unassociated errors
|
||||
.warning => {
|
||||
cur_err = null;
|
||||
continue;
|
||||
},
|
||||
.note => if (cur_err == null) continue,
|
||||
.err => {},
|
||||
}
|
||||
const corresponding_span = mappings.get(err_details.token.line_number);
|
||||
const corresponding_file = mappings.files.get(corresponding_span.filename_offset);
|
||||
|
||||
const source_line_start = err_details.token.getLineStart(source);
|
||||
const column = err_details.token.calculateColumn(source, 1, source_line_start);
|
||||
const err_line = corresponding_span.start_line;
|
||||
|
||||
msg_buf.clearRetainingCapacity();
|
||||
try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items);
|
||||
|
||||
const src_loc = src_loc: {
|
||||
var src_loc: ErrorBundle.SourceLocation = .{
|
||||
.src_path = try bundle.addString(corresponding_file),
|
||||
.line = @intCast(err_line - 1), // 1-based -> 0-based
|
||||
.column = @intCast(column),
|
||||
.span_start = 0,
|
||||
.span_main = 0,
|
||||
.span_end = 0,
|
||||
};
|
||||
if (err_details.print_source_line) {
|
||||
const source_line = err_details.token.getLine(source, source_line_start);
|
||||
const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
|
||||
src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len);
|
||||
src_loc.span_main = @intCast(visual_info.point_offset);
|
||||
src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len);
|
||||
src_loc.source_line = try bundle.addString(source_line);
|
||||
}
|
||||
break :src_loc try bundle.addSourceLocation(src_loc);
|
||||
};
|
||||
|
||||
switch (err_details.type) {
|
||||
.err => {
|
||||
if (cur_err) |err| {
|
||||
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
|
||||
}
|
||||
cur_err = .{
|
||||
.msg = try bundle.addString(msg_buf.items),
|
||||
.src_loc = src_loc,
|
||||
};
|
||||
cur_notes.clearRetainingCapacity();
|
||||
},
|
||||
.note => {
|
||||
cur_err.?.notes_len += 1;
|
||||
try cur_notes.append(comp.gpa, .{
|
||||
.msg = try bundle.addString(msg_buf.items),
|
||||
.src_loc = src_loc,
|
||||
});
|
||||
},
|
||||
.warning, .hint => unreachable,
|
||||
}
|
||||
}
|
||||
if (cur_err) |err| {
|
||||
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
|
||||
}
|
||||
|
||||
const finished_bundle = try bundle.toOwnedBundle("");
|
||||
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
|
||||
}
|
||||
|
||||
fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void {
|
||||
try wip.addRootErrorMessage(msg);
|
||||
const notes_start = try wip.reserveNotes(@intCast(notes.len));
|
||||
for (notes_start.., notes) |i, note| {
|
||||
wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note));
|
||||
}
|
||||
}
|
||||
|
||||
pub const FileExt = enum {
|
||||
c,
|
||||
cpp,
|
||||
@ -4708,6 +5451,7 @@ pub const FileExt = enum {
|
||||
static_library,
|
||||
zig,
|
||||
def,
|
||||
rc,
|
||||
res,
|
||||
unknown,
|
||||
|
||||
@ -4724,6 +5468,7 @@ pub const FileExt = enum {
|
||||
.static_library,
|
||||
.zig,
|
||||
.def,
|
||||
.rc,
|
||||
.res,
|
||||
.unknown,
|
||||
=> false,
|
||||
@ -4747,6 +5492,7 @@ pub const FileExt = enum {
|
||||
.static_library => target.staticLibSuffix(),
|
||||
.zig => ".zig",
|
||||
.def => ".def",
|
||||
.rc => ".rc",
|
||||
.res => ".res",
|
||||
.unknown => "",
|
||||
};
|
||||
@ -4839,7 +5585,9 @@ pub fn classifyFileExt(filename: []const u8) FileExt {
|
||||
return .cu;
|
||||
} else if (mem.endsWith(u8, filename, ".def")) {
|
||||
return .def;
|
||||
} else if (mem.endsWith(u8, filename, ".res")) {
|
||||
} else if (std.ascii.endsWithIgnoreCase(filename, ".rc")) {
|
||||
return .rc;
|
||||
} else if (std.ascii.endsWithIgnoreCase(filename, ".res")) {
|
||||
return .res;
|
||||
} else {
|
||||
return .unknown;
|
||||
@ -4983,6 +5731,13 @@ fn detectLibCFromLibCInstallation(arena: Allocator, target: Target, lci: *const
|
||||
if (!is_redundant) list.appendAssumeCapacity(lci.sys_include_dir.?);
|
||||
|
||||
if (target.os.tag == .windows) {
|
||||
if (std.fs.path.dirname(lci.sys_include_dir.?)) |sys_include_dir_parent| {
|
||||
// This include path will only exist when the optional "Desktop development with C++"
|
||||
// is installed. It contains headers, .rc files, and resources. It is especially
|
||||
// necessary when working with Windows resources.
|
||||
const atlmfc_dir = try std.fs.path.join(arena, &[_][]const u8{ sys_include_dir_parent, "atlmfc", "include" });
|
||||
list.appendAssumeCapacity(atlmfc_dir);
|
||||
}
|
||||
if (std.fs.path.dirname(lci.include_dir.?)) |include_dir_parent| {
|
||||
const um_dir = try std.fs.path.join(arena, &[_][]const u8{ include_dir_parent, "um" });
|
||||
list.appendAssumeCapacity(um_dir);
|
||||
|
||||
@ -1027,6 +1027,9 @@ pub const File = struct {
|
||||
for (comp.c_object_table.keys()) |key| {
|
||||
_ = try man.addFile(key.status.success.object_path, null);
|
||||
}
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
_ = try man.addFile(key.status.success.res_path, null);
|
||||
}
|
||||
try man.addOptionalFile(module_obj_path);
|
||||
try man.addOptionalFile(compiler_rt_path);
|
||||
|
||||
@ -1056,7 +1059,7 @@ pub const File = struct {
|
||||
};
|
||||
}
|
||||
|
||||
const num_object_files = base.options.objects.len + comp.c_object_table.count() + 2;
|
||||
const num_object_files = base.options.objects.len + comp.c_object_table.count() + comp.win32_resource_table.count() + 2;
|
||||
var object_files = try std.ArrayList([*:0]const u8).initCapacity(base.allocator, num_object_files);
|
||||
defer object_files.deinit();
|
||||
|
||||
@ -1066,6 +1069,9 @@ pub const File = struct {
|
||||
for (comp.c_object_table.keys()) |key| {
|
||||
object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.object_path));
|
||||
}
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.res_path));
|
||||
}
|
||||
if (module_obj_path) |p| {
|
||||
object_files.appendAssumeCapacity(try arena.dupeZ(u8, p));
|
||||
}
|
||||
|
||||
@ -72,6 +72,9 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
|
||||
for (comp.c_object_table.keys()) |key| {
|
||||
_ = try man.addFile(key.status.success.object_path, null);
|
||||
}
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
_ = try man.addFile(key.status.success.res_path, null);
|
||||
}
|
||||
try man.addOptionalFile(module_obj_path);
|
||||
man.hash.addOptionalBytes(self.base.options.entry);
|
||||
man.hash.addOptional(self.base.options.stack_size_override);
|
||||
@ -268,6 +271,10 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
|
||||
try argv.append(key.status.success.object_path);
|
||||
}
|
||||
|
||||
for (comp.win32_resource_table.keys()) |key| {
|
||||
try argv.append(key.status.success.res_path);
|
||||
}
|
||||
|
||||
if (module_obj_path) |p| {
|
||||
try argv.append(p);
|
||||
}
|
||||
|
||||
70
src/main.zig
70
src/main.zig
@ -472,6 +472,12 @@ const usage_build_generic =
|
||||
\\ -D[macro]=[value] Define C [macro] to [value] (1 if [value] omitted)
|
||||
\\ --libc [file] Provide a file which specifies libc paths
|
||||
\\ -cflags [flags] -- Set extra flags for the next positional C source files
|
||||
\\ -rcflags [flags] -- Set extra flags for the next positional .rc source files
|
||||
\\ -rcincludes=[type] Set the type of includes to use when compiling .rc source files
|
||||
\\ any (default) Use msvc if available, fall back to gnu
|
||||
\\ msvc Use msvc include paths (must be present on the system)
|
||||
\\ gnu Use mingw include paths (distributed with Zig)
|
||||
\\ none Do not use any autodetected include paths
|
||||
\\
|
||||
\\Link Options:
|
||||
\\ -l[lib], --library [lib] Link against system library (only if actually used)
|
||||
@ -919,11 +925,15 @@ fn buildOutputType(
|
||||
var wasi_emulated_libs = std.ArrayList(wasi_libc.CRTFile).init(arena);
|
||||
var clang_argv = std.ArrayList([]const u8).init(arena);
|
||||
var extra_cflags = std.ArrayList([]const u8).init(arena);
|
||||
var extra_rcflags = std.ArrayList([]const u8).init(arena);
|
||||
// These are before resolving sysroot.
|
||||
var lib_dir_args = std.ArrayList([]const u8).init(arena);
|
||||
var rpath_list = std.ArrayList([]const u8).init(arena);
|
||||
var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{};
|
||||
var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena);
|
||||
var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena);
|
||||
var rc_includes: Compilation.RcIncludes = .any;
|
||||
var res_files = std.ArrayList(Compilation.LinkObject).init(arena);
|
||||
var link_objects = std.ArrayList(Compilation.LinkObject).init(arena);
|
||||
var framework_dirs = std.ArrayList([]const u8).init(arena);
|
||||
var frameworks: std.StringArrayHashMapUnmanaged(Framework) = .{};
|
||||
@ -1042,6 +1052,19 @@ fn buildOutputType(
|
||||
if (mem.eql(u8, next_arg, "--")) break;
|
||||
try extra_cflags.append(next_arg);
|
||||
}
|
||||
} else if (mem.eql(u8, arg, "-rcincludes")) {
|
||||
rc_includes = parseRcIncludes(args_iter.nextOrFatal());
|
||||
} else if (mem.startsWith(u8, arg, "-rcincludes=")) {
|
||||
rc_includes = parseRcIncludes(arg["-rcincludes=".len..]);
|
||||
} else if (mem.eql(u8, arg, "-rcflags")) {
|
||||
extra_rcflags.shrinkRetainingCapacity(0);
|
||||
while (true) {
|
||||
const next_arg = args_iter.next() orelse {
|
||||
fatal("expected -- after -rcflags", .{});
|
||||
};
|
||||
if (mem.eql(u8, next_arg, "--")) break;
|
||||
try extra_rcflags.append(next_arg);
|
||||
}
|
||||
} else if (mem.eql(u8, arg, "--color")) {
|
||||
const next_arg = args_iter.next() orelse {
|
||||
fatal("expected [auto|on|off] after --color", .{});
|
||||
@ -1590,7 +1613,8 @@ fn buildOutputType(
|
||||
}
|
||||
} else switch (file_ext orelse
|
||||
Compilation.classifyFileExt(arg)) {
|
||||
.object, .static_library, .shared_library, .res => try link_objects.append(.{ .path = arg }),
|
||||
.object, .static_library, .shared_library => try link_objects.append(.{ .path = arg }),
|
||||
.res => try res_files.append(.{ .path = arg }),
|
||||
.assembly, .assembly_with_cpp, .c, .cpp, .h, .ll, .bc, .m, .mm, .cu => {
|
||||
try c_source_files.append(.{
|
||||
.src_path = arg,
|
||||
@ -1599,6 +1623,12 @@ fn buildOutputType(
|
||||
.ext = file_ext,
|
||||
});
|
||||
},
|
||||
.rc => {
|
||||
try rc_source_files.append(.{
|
||||
.src_path = arg,
|
||||
.extra_flags = try arena.dupe([]const u8, extra_rcflags.items),
|
||||
});
|
||||
},
|
||||
.zig => {
|
||||
if (root_src_file) |other| {
|
||||
fatal("found another zig file '{s}' after root source file '{s}'", .{ arg, other });
|
||||
@ -1684,13 +1714,20 @@ fn buildOutputType(
|
||||
.ext = file_ext, // duped while parsing the args.
|
||||
});
|
||||
},
|
||||
.unknown, .shared_library, .object, .static_library, .res => try link_objects.append(.{
|
||||
.unknown, .shared_library, .object, .static_library => try link_objects.append(.{
|
||||
.path = it.only_arg,
|
||||
.must_link = must_link,
|
||||
}),
|
||||
.res => try res_files.append(.{
|
||||
.path = it.only_arg,
|
||||
.must_link = must_link,
|
||||
}),
|
||||
.def => {
|
||||
linker_module_definition_file = it.only_arg;
|
||||
},
|
||||
.rc => {
|
||||
try rc_source_files.append(.{ .src_path = it.only_arg });
|
||||
},
|
||||
.zig => {
|
||||
if (root_src_file) |other| {
|
||||
fatal("found another zig file '{s}' after root source file '{s}'", .{ it.only_arg, other });
|
||||
@ -2452,6 +2489,12 @@ fn buildOutputType(
|
||||
} else if (emit_bin == .yes) {
|
||||
const basename = fs.path.basename(emit_bin.yes);
|
||||
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
|
||||
} else if (rc_source_files.items.len >= 1) {
|
||||
const basename = fs.path.basename(rc_source_files.items[0].src_path);
|
||||
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
|
||||
} else if (res_files.items.len >= 1) {
|
||||
const basename = fs.path.basename(res_files.items[0].path);
|
||||
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
|
||||
} else if (show_builtin) {
|
||||
break :blk "builtin";
|
||||
} else if (arg_mode == .run) {
|
||||
@ -2530,6 +2573,21 @@ fn buildOutputType(
|
||||
link_libcpp = true;
|
||||
}
|
||||
|
||||
if (target_info.target.ofmt == .coff) {
|
||||
// Now that we know the target supports resources,
|
||||
// we can add the res files as link objects.
|
||||
for (res_files.items) |res_file| {
|
||||
try link_objects.append(res_file);
|
||||
}
|
||||
} else {
|
||||
if (rc_source_files.items.len != 0) {
|
||||
fatal("rc files are not allowed unless the target object format is coff (Windows/UEFI)", .{});
|
||||
}
|
||||
if (res_files.items.len != 0) {
|
||||
fatal("res files are not allowed unless the target object format is coff (Windows/UEFI)", .{});
|
||||
}
|
||||
}
|
||||
|
||||
if (target_info.target.cpu.arch.isWasm()) blk: {
|
||||
if (single_threaded == null) {
|
||||
single_threaded = true;
|
||||
@ -2933,6 +2991,7 @@ fn buildOutputType(
|
||||
if (output_mode == .Obj and (object_format == .coff or object_format == .macho)) {
|
||||
const total_obj_count = c_source_files.items.len +
|
||||
@intFromBool(root_src_file != null) +
|
||||
rc_source_files.items.len +
|
||||
link_objects.items.len;
|
||||
if (total_obj_count > 1) {
|
||||
fatal("{s} does not support linking multiple objects into one", .{@tagName(object_format)});
|
||||
@ -3319,6 +3378,8 @@ fn buildOutputType(
|
||||
.rpath_list = rpath_list.items,
|
||||
.symbol_wrap_set = symbol_wrap_set,
|
||||
.c_source_files = c_source_files.items,
|
||||
.rc_source_files = rc_source_files.items,
|
||||
.rc_includes = rc_includes,
|
||||
.link_objects = link_objects.items,
|
||||
.framework_dirs = framework_dirs.items,
|
||||
.frameworks = resolved_frameworks.items,
|
||||
@ -6478,3 +6539,8 @@ fn accessFrameworkPath(
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
fn parseRcIncludes(arg: []const u8) Compilation.RcIncludes {
|
||||
return std.meta.stringToEnum(Compilation.RcIncludes, arg) orelse
|
||||
fatal("unsupported rc includes type: '{s}'", .{arg});
|
||||
}
|
||||
|
||||
18
src/resinator.zig
Normal file
18
src/resinator.zig
Normal file
@ -0,0 +1,18 @@
|
||||
pub const ani = @import("resinator/ani.zig");
|
||||
pub const ast = @import("resinator/ast.zig");
|
||||
pub const bmp = @import("resinator/bmp.zig");
|
||||
pub const cli = @import("resinator/cli.zig");
|
||||
pub const code_pages = @import("resinator/code_pages.zig");
|
||||
pub const comments = @import("resinator/comments.zig");
|
||||
pub const compile = @import("resinator/compile.zig");
|
||||
pub const errors = @import("resinator/errors.zig");
|
||||
pub const ico = @import("resinator/ico.zig");
|
||||
pub const lang = @import("resinator/lang.zig");
|
||||
pub const lex = @import("resinator/lex.zig");
|
||||
pub const literals = @import("resinator/literals.zig");
|
||||
pub const parse = @import("resinator/parse.zig");
|
||||
pub const rc = @import("resinator/rc.zig");
|
||||
pub const res = @import("resinator/res.zig");
|
||||
pub const source_mapping = @import("resinator/source_mapping.zig");
|
||||
pub const utils = @import("resinator/utils.zig");
|
||||
pub const windows1252 = @import("resinator/windows1252.zig");
|
||||
58
src/resinator/ani.zig
Normal file
58
src/resinator/ani.zig
Normal file
@ -0,0 +1,58 @@
|
||||
//! https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
|
||||
//! https://www.moon-soft.com/program/format/windows/ani.htm
|
||||
//! https://www.gdgsoft.com/anituner/help/aniformat.htm
|
||||
//! https://www.lomont.org/software/aniexploit/ExploitANI.pdf
|
||||
//!
|
||||
//! RIFF( 'ACON'
|
||||
//! [LIST( 'INFO' <info_data> )]
|
||||
//! [<DISP_ck>]
|
||||
//! anih( <ani_header> )
|
||||
//! [rate( <rate_info> )]
|
||||
//! ['seq '( <sequence_info> )]
|
||||
//! LIST( 'fram' icon( <icon_file> ) ... )
|
||||
//! )
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const AF_ICON: u32 = 1;
|
||||
|
||||
pub fn isAnimatedIcon(reader: anytype) bool {
|
||||
const flags = getAniheaderFlags(reader) catch return false;
|
||||
return flags & AF_ICON == AF_ICON;
|
||||
}
|
||||
|
||||
fn getAniheaderFlags(reader: anytype) !u32 {
|
||||
const riff_header = try reader.readBytesNoEof(4);
|
||||
if (!std.mem.eql(u8, &riff_header, "RIFF")) return error.InvalidFormat;
|
||||
|
||||
_ = try reader.readIntLittle(u32); // size of RIFF chunk
|
||||
|
||||
const form_type = try reader.readBytesNoEof(4);
|
||||
if (!std.mem.eql(u8, &form_type, "ACON")) return error.InvalidFormat;
|
||||
|
||||
while (true) {
|
||||
const chunk_id = try reader.readBytesNoEof(4);
|
||||
const chunk_len = try reader.readIntLittle(u32);
|
||||
if (!std.mem.eql(u8, &chunk_id, "anih")) {
|
||||
// TODO: Move file cursor instead of skipBytes
|
||||
try reader.skipBytes(chunk_len, .{});
|
||||
continue;
|
||||
}
|
||||
|
||||
const aniheader = try reader.readStruct(ANIHEADER);
|
||||
return std.mem.nativeToLittle(u32, aniheader.flags);
|
||||
}
|
||||
}
|
||||
|
||||
/// From Microsoft Multimedia Data Standards Update April 15, 1994
|
||||
const ANIHEADER = extern struct {
|
||||
cbSizeof: u32,
|
||||
cFrames: u32,
|
||||
cSteps: u32,
|
||||
cx: u32,
|
||||
cy: u32,
|
||||
cBitCount: u32,
|
||||
cPlanes: u32,
|
||||
jifRate: u32,
|
||||
flags: u32,
|
||||
};
|
||||
1084
src/resinator/ast.zig
Normal file
1084
src/resinator/ast.zig
Normal file
File diff suppressed because it is too large
Load Diff
268
src/resinator/bmp.zig
Normal file
268
src/resinator/bmp.zig
Normal file
@ -0,0 +1,268 @@
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader
|
||||
//! https://learn.microsoft.com/en-us/previous-versions//dd183376(v=vs.85)
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfo
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader
|
||||
//! https://archive.org/details/mac_Graphics_File_Formats_Second_Edition_1996/page/n607/mode/2up
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapv5header
|
||||
//!
|
||||
//! Notes:
|
||||
//! - The Microsoft documentation is incredibly unclear about the color table when the
|
||||
//! bit depth is >= 16.
|
||||
//! + For bit depth 24 it says "the bmiColors member of BITMAPINFO is NULL" but also
|
||||
//! says "the bmiColors color table is used for optimizing colors used on palette-based
|
||||
//! devices, and must contain the number of entries specified by the bV5ClrUsed member"
|
||||
//! + For bit depth 16 and 32, it seems to imply that if the compression is BI_BITFIELDS
|
||||
//! or BI_ALPHABITFIELDS, then the color table *only* consists of the bit masks, but
|
||||
//! doesn't really say this outright and the Wikipedia article seems to disagree
|
||||
//! For the purposes of this implementation, color tables can always be present for any
|
||||
//! bit depth and compression, and the color table follows the header + any optional
|
||||
//! bit mask fields dictated by the specified compression.
|
||||
|
||||
const std = @import("std");
|
||||
const BitmapHeader = @import("ico.zig").BitmapHeader;
|
||||
|
||||
pub const windows_format_id = std.mem.readIntNative(u16, "BM");
|
||||
pub const file_header_len = 14;
|
||||
|
||||
pub const ReadError = error{
|
||||
UnexpectedEOF,
|
||||
InvalidFileHeader,
|
||||
ImpossiblePixelDataOffset,
|
||||
UnknownBitmapVersion,
|
||||
InvalidBitsPerPixel,
|
||||
TooManyColorsInPalette,
|
||||
MissingBitfieldMasks,
|
||||
};
|
||||
|
||||
pub const BitmapInfo = struct {
|
||||
dib_header_size: u32,
|
||||
/// Contains the interpreted number of colors in the palette (e.g.
|
||||
/// if the field's value is zero and the bit depth is <= 8, this
|
||||
/// will contain the maximum number of colors for the bit depth
|
||||
/// rather than the field's value directly).
|
||||
colors_in_palette: u32,
|
||||
bytes_per_color_palette_element: u8,
|
||||
pixel_data_offset: u32,
|
||||
compression: Compression,
|
||||
|
||||
pub fn getExpectedPaletteByteLen(self: *const BitmapInfo) u64 {
|
||||
return @as(u64, self.colors_in_palette) * self.bytes_per_color_palette_element;
|
||||
}
|
||||
|
||||
pub fn getActualPaletteByteLen(self: *const BitmapInfo) u64 {
|
||||
return self.getByteLenBetweenHeadersAndPixels() - self.getBitmasksByteLen();
|
||||
}
|
||||
|
||||
pub fn getByteLenBetweenHeadersAndPixels(self: *const BitmapInfo) u64 {
|
||||
return @as(u64, self.pixel_data_offset) - self.dib_header_size - file_header_len;
|
||||
}
|
||||
|
||||
pub fn getBitmasksByteLen(self: *const BitmapInfo) u8 {
|
||||
return switch (self.compression) {
|
||||
.BI_BITFIELDS => 12,
|
||||
.BI_ALPHABITFIELDS => 16,
|
||||
else => 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn getMissingPaletteByteLen(self: *const BitmapInfo) u64 {
|
||||
if (self.getActualPaletteByteLen() >= self.getExpectedPaletteByteLen()) return 0;
|
||||
return self.getExpectedPaletteByteLen() - self.getActualPaletteByteLen();
|
||||
}
|
||||
|
||||
/// Returns the full byte len of the DIB header + optional bitmasks + color palette
|
||||
pub fn getExpectedByteLenBeforePixelData(self: *const BitmapInfo) u64 {
|
||||
return @as(u64, self.dib_header_size) + self.getBitmasksByteLen() + self.getExpectedPaletteByteLen();
|
||||
}
|
||||
|
||||
/// Returns the full expected byte len
|
||||
pub fn getExpectedByteLen(self: *const BitmapInfo, file_size: u64) u64 {
|
||||
return self.getExpectedByteLenBeforePixelData() + self.getPixelDataLen(file_size);
|
||||
}
|
||||
|
||||
pub fn getPixelDataLen(self: *const BitmapInfo, file_size: u64) u64 {
|
||||
return file_size - self.pixel_data_offset;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn read(reader: anytype, max_size: u64) ReadError!BitmapInfo {
|
||||
var bitmap_info: BitmapInfo = undefined;
|
||||
const file_header = reader.readBytesNoEof(file_header_len) catch return error.UnexpectedEOF;
|
||||
|
||||
const id = std.mem.readIntNative(u16, file_header[0..2]);
|
||||
if (id != windows_format_id) return error.InvalidFileHeader;
|
||||
|
||||
bitmap_info.pixel_data_offset = std.mem.readIntNative(u32, file_header[10..14]);
|
||||
if (bitmap_info.pixel_data_offset > max_size) return error.ImpossiblePixelDataOffset;
|
||||
|
||||
bitmap_info.dib_header_size = reader.readIntLittle(u32) catch return error.UnexpectedEOF;
|
||||
if (bitmap_info.pixel_data_offset < file_header_len + bitmap_info.dib_header_size) return error.ImpossiblePixelDataOffset;
|
||||
const dib_version = BitmapHeader.Version.get(bitmap_info.dib_header_size);
|
||||
switch (dib_version) {
|
||||
.@"nt3.1", .@"nt4.0", .@"nt5.0" => {
|
||||
var dib_header_buf: [@sizeOf(BITMAPINFOHEADER)]u8 align(@alignOf(BITMAPINFOHEADER)) = undefined;
|
||||
std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size);
|
||||
reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF;
|
||||
var dib_header: *BITMAPINFOHEADER = @ptrCast(&dib_header_buf);
|
||||
structFieldsLittleToNative(BITMAPINFOHEADER, dib_header);
|
||||
|
||||
bitmap_info.colors_in_palette = try dib_header.numColorsInTable();
|
||||
bitmap_info.bytes_per_color_palette_element = 4;
|
||||
bitmap_info.compression = @enumFromInt(dib_header.biCompression);
|
||||
|
||||
if (bitmap_info.getByteLenBetweenHeadersAndPixels() < bitmap_info.getBitmasksByteLen()) {
|
||||
return error.MissingBitfieldMasks;
|
||||
}
|
||||
},
|
||||
.@"win2.0" => {
|
||||
var dib_header_buf: [@sizeOf(BITMAPCOREHEADER)]u8 align(@alignOf(BITMAPCOREHEADER)) = undefined;
|
||||
std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size);
|
||||
reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF;
|
||||
var dib_header: *BITMAPCOREHEADER = @ptrCast(&dib_header_buf);
|
||||
structFieldsLittleToNative(BITMAPCOREHEADER, dib_header);
|
||||
|
||||
// > The size of the color palette is calculated from the BitsPerPixel value.
|
||||
// > The color palette has 2, 16, 256, or 0 entries for a BitsPerPixel of
|
||||
// > 1, 4, 8, and 24, respectively.
|
||||
bitmap_info.colors_in_palette = switch (dib_header.bcBitCount) {
|
||||
inline 1, 4, 8 => |bit_count| 1 << bit_count,
|
||||
24 => 0,
|
||||
else => return error.InvalidBitsPerPixel,
|
||||
};
|
||||
bitmap_info.bytes_per_color_palette_element = 3;
|
||||
|
||||
bitmap_info.compression = .BI_RGB;
|
||||
},
|
||||
.unknown => return error.UnknownBitmapVersion,
|
||||
}
|
||||
|
||||
return bitmap_info;
|
||||
}
|
||||
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader
|
||||
pub const BITMAPCOREHEADER = extern struct {
|
||||
bcSize: u32,
|
||||
bcWidth: u16,
|
||||
bcHeight: u16,
|
||||
bcPlanes: u16,
|
||||
bcBitCount: u16,
|
||||
};
|
||||
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader
|
||||
pub const BITMAPINFOHEADER = extern struct {
|
||||
bcSize: u32,
|
||||
biWidth: i32,
|
||||
biHeight: i32,
|
||||
biPlanes: u16,
|
||||
biBitCount: u16,
|
||||
biCompression: u32,
|
||||
biSizeImage: u32,
|
||||
biXPelsPerMeter: i32,
|
||||
biYPelsPerMeter: i32,
|
||||
biClrUsed: u32,
|
||||
biClrImportant: u32,
|
||||
|
||||
/// Returns error.TooManyColorsInPalette if the number of colors specified
|
||||
/// exceeds the number of possible colors referenced in the pixel data (i.e.
|
||||
/// if 1 bit is used per pixel, then the color table can't have more than 2 colors
|
||||
/// since any more couldn't possibly be indexed in the pixel data)
|
||||
///
|
||||
/// Returns error.InvalidBitsPerPixel if the bit depth is not 1, 4, 8, 16, 24, or 32.
|
||||
pub fn numColorsInTable(self: BITMAPINFOHEADER) !u32 {
|
||||
switch (self.biBitCount) {
|
||||
inline 1, 4, 8 => |bit_count| switch (self.biClrUsed) {
|
||||
// > If biClrUsed is zero, the array contains the maximum number of
|
||||
// > colors for the given bitdepth; that is, 2^biBitCount colors
|
||||
0 => return 1 << bit_count,
|
||||
// > If biClrUsed is nonzero and the biBitCount member is less than 16,
|
||||
// > the biClrUsed member specifies the actual number of colors the
|
||||
// > graphics engine or device driver accesses.
|
||||
else => {
|
||||
const max_colors = 1 << bit_count;
|
||||
if (self.biClrUsed > max_colors) {
|
||||
return error.TooManyColorsInPalette;
|
||||
}
|
||||
return self.biClrUsed;
|
||||
},
|
||||
},
|
||||
// > If biBitCount is 16 or greater, the biClrUsed member specifies
|
||||
// > the size of the color table used to optimize performance of the
|
||||
// > system color palettes.
|
||||
//
|
||||
// Note: Bit depths >= 16 only use the color table 'for optimizing colors
|
||||
// used on palette-based devices', but it still makes sense to limit their
|
||||
// colors since the pixel data is still limited to this number of colors
|
||||
// (i.e. even though the color table is not indexed by the pixel data,
|
||||
// the color table having more colors than the pixel data can represent
|
||||
// would never make sense and indicates a malformed bitmap).
|
||||
inline 16, 24, 32 => |bit_count| {
|
||||
const max_colors = 1 << bit_count;
|
||||
if (self.biClrUsed > max_colors) {
|
||||
return error.TooManyColorsInPalette;
|
||||
}
|
||||
return self.biClrUsed;
|
||||
},
|
||||
else => return error.InvalidBitsPerPixel,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const Compression = enum(u32) {
|
||||
BI_RGB = 0,
|
||||
BI_RLE8 = 1,
|
||||
BI_RLE4 = 2,
|
||||
BI_BITFIELDS = 3,
|
||||
BI_JPEG = 4,
|
||||
BI_PNG = 5,
|
||||
BI_ALPHABITFIELDS = 6,
|
||||
BI_CMYK = 11,
|
||||
BI_CMYKRLE8 = 12,
|
||||
BI_CMYKRLE4 = 13,
|
||||
_,
|
||||
};
|
||||
|
||||
fn structFieldsLittleToNative(comptime T: type, x: *T) void {
|
||||
inline for (@typeInfo(T).Struct.fields) |field| {
|
||||
@field(x, field.name) = std.mem.littleToNative(field.type, @field(x, field.name));
|
||||
}
|
||||
}
|
||||
|
||||
test "read" {
|
||||
var bmp_data = "BM<\x00\x00\x00\x00\x00\x00\x006\x00\x00\x00(\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00\x12\x0b\x00\x00\x12\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x7f\x00\x00\x00\x00".*;
|
||||
var fbs = std.io.fixedBufferStream(&bmp_data);
|
||||
|
||||
{
|
||||
const bitmap = try read(fbs.reader(), bmp_data.len);
|
||||
try std.testing.expectEqual(@as(u32, BitmapHeader.Version.@"nt3.1".len()), bitmap.dib_header_size);
|
||||
}
|
||||
|
||||
{
|
||||
fbs.reset();
|
||||
bmp_data[file_header_len] = 11;
|
||||
try std.testing.expectError(error.UnknownBitmapVersion, read(fbs.reader(), bmp_data.len));
|
||||
|
||||
// restore
|
||||
bmp_data[file_header_len] = BitmapHeader.Version.@"nt3.1".len();
|
||||
}
|
||||
|
||||
{
|
||||
fbs.reset();
|
||||
bmp_data[0] = 'b';
|
||||
try std.testing.expectError(error.InvalidFileHeader, read(fbs.reader(), bmp_data.len));
|
||||
|
||||
// restore
|
||||
bmp_data[0] = 'B';
|
||||
}
|
||||
|
||||
{
|
||||
const cutoff_len = file_header_len + BitmapHeader.Version.@"nt3.1".len() - 1;
|
||||
var dib_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]);
|
||||
try std.testing.expectError(error.UnexpectedEOF, read(dib_cutoff_fbs.reader(), bmp_data.len));
|
||||
}
|
||||
|
||||
{
|
||||
const cutoff_len = file_header_len - 1;
|
||||
var bmp_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]);
|
||||
try std.testing.expectError(error.UnexpectedEOF, read(bmp_cutoff_fbs.reader(), bmp_data.len));
|
||||
}
|
||||
}
|
||||
1433
src/resinator/cli.zig
Normal file
1433
src/resinator/cli.zig
Normal file
File diff suppressed because it is too large
Load Diff
487
src/resinator/code_pages.zig
Normal file
487
src/resinator/code_pages.zig
Normal file
@ -0,0 +1,487 @@
|
||||
const std = @import("std");
|
||||
const windows1252 = @import("windows1252.zig");
|
||||
|
||||
// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing
|
||||
// than it is to the stuff in this file.
|
||||
//
|
||||
// ‰ representations for context:
|
||||
// Win-1252 89
|
||||
// UTF-8 E2 80 B0
|
||||
// UTF-16 20 30
|
||||
//
|
||||
// With code page 65001:
|
||||
// ‰ RCDATA { "‰" L"‰" }
|
||||
// File encoded as Windows-1252:
|
||||
// ‰ => <U+FFFD REPLACEMENT CHARACTER> as u16
|
||||
// "‰" => 0x3F ('?')
|
||||
// L"‰" => <U+FFFD REPLACEMENT CHARACTER> as u16
|
||||
// File encoded as UTF-8:
|
||||
// ‰ => <U+2030 ‰> as u16
|
||||
// "‰" => 0x89 ('‰' encoded as Windows-1252)
|
||||
// L"‰" => <U+2030 ‰> as u16
|
||||
//
|
||||
// With code page 1252:
|
||||
// ‰ RCDATA { "‰" L"‰" }
|
||||
// File encoded as Windows-1252:
|
||||
// ‰ => <U+2030 ‰> as u16
|
||||
// "‰" => 0x89 ('‰' encoded as Windows-1252)
|
||||
// L"‰" => <U+2030 ‰> as u16
|
||||
// File encoded as UTF-8:
|
||||
// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16
|
||||
// ^ first byte of utf8 representation
|
||||
// ^ second byte of UTF-8 representation (0x80), but interpretted as
|
||||
// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>)
|
||||
// ^ third byte of utf8 representation
|
||||
// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation)
|
||||
// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation)
|
||||
//
|
||||
// With code page 1252:
|
||||
// <0x90> RCDATA { "<0x90>" L"<0x90>" }
|
||||
// File encoded as Windows-1252:
|
||||
// <0x90> => 0x90 as u16
|
||||
// "<0x90>" => 0x90
|
||||
// L"<0x90>" => 0x90 as u16
|
||||
// File encoded as UTF-8:
|
||||
// <0x90> => 0xC2 as u16, 0x90 as u16
|
||||
// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>)
|
||||
// L"<0x90>" => 0xC2 as u16, 0x90 as u16
|
||||
//
|
||||
// Within a raw data block, file encoded as Windows-1252 (Â is <0xC2>):
|
||||
// "Âa" L"Âa" "\xC2ad" L"\xC2AD"
|
||||
// With code page 1252:
|
||||
// C2 61 C2 00 61 00 C2 61 64 AD C2
|
||||
// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD
|
||||
// \xC2~`
|
||||
// With code page 65001:
|
||||
// 3F 61 FD FF 61 00 C2 61 64 AD C2
|
||||
// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD
|
||||
// `. `. `~\xC2
|
||||
// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it).
|
||||
// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the
|
||||
// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>.
|
||||
// `~Same as ^ but converted to '?' instead.
|
||||
//
|
||||
// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>):
|
||||
// "ð€a" L"ð€a"
|
||||
// With code page 1252:
|
||||
// F0 80 61 F0 00 AC 20 61 00
|
||||
// ð^ €^ a^ ð~~~^ €~~~^ a~~~^
|
||||
// With code page 65001:
|
||||
// 3F 61 FD FF 61 00
|
||||
// ^. a^ ^~~~. a~~~^
|
||||
// `. `.
|
||||
// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so
|
||||
// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>'
|
||||
// `~Same as ^ but converted to '?' instead.
|
||||
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
|
||||
pub const CodePage = enum(u16) {
|
||||
// supported
|
||||
windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows)
|
||||
utf8 = 65001, // utf-8 Unicode (UTF-8)
|
||||
|
||||
// unsupported but valid
|
||||
ibm037 = 37, // IBM037 IBM EBCDIC US-Canada
|
||||
ibm437 = 437, // IBM437 OEM United States
|
||||
ibm500 = 500, // IBM500 IBM EBCDIC International
|
||||
asmo708 = 708, // ASMO-708 Arabic (ASMO 708)
|
||||
asmo449plus = 709, // Arabic (ASMO-449+, BCON V4)
|
||||
transparent_arabic = 710, // Arabic - Transparent Arabic
|
||||
dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
|
||||
ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS)
|
||||
ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS)
|
||||
ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS)
|
||||
ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS)
|
||||
ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian)
|
||||
ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS)
|
||||
ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol
|
||||
ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS)
|
||||
ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS)
|
||||
dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS)
|
||||
ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS)
|
||||
ibm864 = 864, // IBM864 OEM Arabic; Arabic (864)
|
||||
ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS)
|
||||
cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS)
|
||||
ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS)
|
||||
ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
|
||||
windows874 = 874, // windows-874 Thai (Windows)
|
||||
cp875 = 875, // cp875 IBM EBCDIC Greek Modern
|
||||
shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS)
|
||||
gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
|
||||
ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code)
|
||||
big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
|
||||
ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5)
|
||||
ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System
|
||||
ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
|
||||
ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
|
||||
ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
|
||||
ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
|
||||
ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
|
||||
ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
|
||||
ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
|
||||
ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
|
||||
ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
|
||||
ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
|
||||
utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
|
||||
utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications
|
||||
windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows)
|
||||
windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows)
|
||||
windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows)
|
||||
windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows)
|
||||
windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows)
|
||||
windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows)
|
||||
windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows)
|
||||
windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows)
|
||||
johab = 1361, // Johab Korean (Johab)
|
||||
macintosh = 10000, // macintosh MAC Roman; Western European (Mac)
|
||||
x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac)
|
||||
x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
|
||||
x_mac_korean = 10003, // x-mac-korean Korean (Mac)
|
||||
x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac)
|
||||
x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac)
|
||||
x_mac_greek = 10006, // x-mac-greek Greek (Mac)
|
||||
x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac)
|
||||
x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
|
||||
x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac)
|
||||
x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac)
|
||||
x_mac_thai = 10021, // x-mac-thai Thai (Mac)
|
||||
x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac)
|
||||
x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac)
|
||||
x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac)
|
||||
x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac)
|
||||
utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
|
||||
utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
|
||||
x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
|
||||
x_cp20001 = 20001, // x-cp20001 TCA Taiwan
|
||||
x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
|
||||
x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan
|
||||
x_cp20004 = 20004, // x-cp20004 TeleText Taiwan
|
||||
x_cp20005 = 20005, // x-cp20005 Wang Taiwan
|
||||
x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
|
||||
x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit)
|
||||
x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit)
|
||||
x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit)
|
||||
us_ascii = 20127, // us-ascii US-ASCII (7-bit)
|
||||
x_cp20261 = 20261, // x-cp20261 T.61
|
||||
x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent
|
||||
ibm273 = 20273, // IBM273 IBM EBCDIC Germany
|
||||
ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway
|
||||
ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden
|
||||
ibm280 = 20280, // IBM280 IBM EBCDIC Italy
|
||||
ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain
|
||||
ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom
|
||||
ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended
|
||||
ibm297 = 20297, // IBM297 IBM EBCDIC France
|
||||
ibm420 = 20420, // IBM420 IBM EBCDIC Arabic
|
||||
ibm423 = 20423, // IBM423 IBM EBCDIC Greek
|
||||
ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew
|
||||
x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
|
||||
ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai
|
||||
koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
|
||||
ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic
|
||||
ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian
|
||||
ibm905 = 20905, // IBM905 IBM EBCDIC Turkish
|
||||
ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
|
||||
euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
|
||||
x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
|
||||
x_cp20949 = 20949, // x-cp20949 Korean Wansung
|
||||
cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
|
||||
// = 21027, // (deprecated)
|
||||
koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
|
||||
iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
|
||||
iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
|
||||
iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3
|
||||
iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic
|
||||
iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic
|
||||
iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic
|
||||
iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek
|
||||
iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
|
||||
iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish
|
||||
iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian
|
||||
iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9
|
||||
x_europa = 29001, // x-Europa Europa 3
|
||||
is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
|
||||
iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
|
||||
cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
|
||||
iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
|
||||
iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean
|
||||
x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
|
||||
iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese
|
||||
ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended
|
||||
ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese
|
||||
ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean
|
||||
ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese
|
||||
ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese
|
||||
ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese
|
||||
ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese
|
||||
euc_jp = 51932, // euc-jp EUC Japanese
|
||||
euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
|
||||
euc_kr = 51949, // euc-kr EUC Korean
|
||||
euc_chinesetrad = 51950, // EUC Traditional Chinese
|
||||
hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
|
||||
gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
|
||||
x_iscii_de = 57002, // x-iscii-de ISCII Devanagari
|
||||
x_iscii_be = 57003, // x-iscii-be ISCII Bangla
|
||||
x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil
|
||||
x_iscii_te = 57005, // x-iscii-te ISCII Telugu
|
||||
x_iscii_as = 57006, // x-iscii-as ISCII Assamese
|
||||
x_iscii_or = 57007, // x-iscii-or ISCII Odia
|
||||
x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada
|
||||
x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam
|
||||
x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati
|
||||
x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi
|
||||
utf7 = 65000, // utf-7 Unicode (UTF-7)
|
||||
|
||||
pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint {
|
||||
if (index >= bytes.len) return null;
|
||||
switch (code_page) {
|
||||
.windows1252 => {
|
||||
// All byte values have a representation, so just convert the byte
|
||||
return Codepoint{
|
||||
.value = windows1252.toCodepoint(bytes[index]),
|
||||
.byte_len = 1,
|
||||
};
|
||||
},
|
||||
.utf8 => {
|
||||
return Utf8.WellFormedDecoder.decode(bytes[index..]);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn isSupported(code_page: CodePage) bool {
|
||||
return switch (code_page) {
|
||||
.windows1252, .utf8 => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn getByIdentifier(identifier: u16) !CodePage {
|
||||
// There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but
|
||||
// this should be fine, especially since this function likely won't be called much.
|
||||
inline for (@typeInfo(CodePage).Enum.fields) |enumField| {
|
||||
if (identifier == enumField.value) {
|
||||
return @field(CodePage, enumField.name);
|
||||
}
|
||||
}
|
||||
return error.InvalidCodePage;
|
||||
}
|
||||
|
||||
pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage {
|
||||
const code_page = try getByIdentifier(identifier);
|
||||
switch (isSupported(code_page)) {
|
||||
true => return code_page,
|
||||
false => return error.UnsupportedCodePage,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const Utf8 = struct {
|
||||
/// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
|
||||
/// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
|
||||
pub const WellFormedDecoder = struct {
|
||||
/// Like std.unicode.utf8ByteSequenceLength, but:
|
||||
/// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
|
||||
/// - Returns an optional value instead of an error union
|
||||
pub fn sequenceLength(first_byte: u8) ?u3 {
|
||||
return switch (first_byte) {
|
||||
0x00...0x7F => 1,
|
||||
0xC2...0xDF => 2,
|
||||
0xE0...0xEF => 3,
|
||||
0xF0...0xF4 => 4,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn isContinuationByte(byte: u8) bool {
|
||||
return switch (byte) {
|
||||
0x80...0xBF => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn decode(bytes: []const u8) Codepoint {
|
||||
std.debug.assert(bytes.len > 0);
|
||||
var first_byte = bytes[0];
|
||||
var expected_len = sequenceLength(first_byte) orelse {
|
||||
return .{ .value = Codepoint.invalid, .byte_len = 1 };
|
||||
};
|
||||
if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 };
|
||||
|
||||
var value: u21 = first_byte & 0b00011111;
|
||||
var byte_index: u8 = 1;
|
||||
while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) {
|
||||
const byte = bytes[byte_index];
|
||||
// See Table 3-7 of D92 in Chapter 3 of the Unicode Standard
|
||||
const valid: bool = switch (byte_index) {
|
||||
1 => switch (first_byte) {
|
||||
0xE0 => switch (byte) {
|
||||
0xA0...0xBF => true,
|
||||
else => false,
|
||||
},
|
||||
0xED => switch (byte) {
|
||||
0x80...0x9F => true,
|
||||
else => false,
|
||||
},
|
||||
0xF0 => switch (byte) {
|
||||
0x90...0xBF => true,
|
||||
else => false,
|
||||
},
|
||||
0xF4 => switch (byte) {
|
||||
0x80...0x8F => true,
|
||||
else => false,
|
||||
},
|
||||
else => switch (byte) {
|
||||
0x80...0xBF => true,
|
||||
else => false,
|
||||
},
|
||||
},
|
||||
else => switch (byte) {
|
||||
0x80...0xBF => true,
|
||||
else => false,
|
||||
},
|
||||
};
|
||||
|
||||
if (!valid) {
|
||||
var len = byte_index;
|
||||
// Only include the byte in the invalid sequence if it's in the range
|
||||
// of a continuation byte. All other values should not be included in the
|
||||
// invalid sequence.
|
||||
//
|
||||
// Note: This is how the Windows RC compiler handles this, this may not
|
||||
// be the correct-as-according-to-the-Unicode-standard way to do it.
|
||||
if (isContinuationByte(byte)) len += 1;
|
||||
return .{ .value = Codepoint.invalid, .byte_len = len };
|
||||
}
|
||||
|
||||
value <<= 6;
|
||||
value |= byte & 0b00111111;
|
||||
}
|
||||
if (byte_index != expected_len) {
|
||||
return .{ .value = Codepoint.invalid, .byte_len = byte_index };
|
||||
}
|
||||
return .{ .value = value, .byte_len = expected_len };
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
test "Utf8.WellFormedDecoder" {
|
||||
const invalid_utf8 = "\xF0\x80";
|
||||
var decoded = Utf8.WellFormedDecoder.decode(invalid_utf8);
|
||||
try std.testing.expectEqual(Codepoint.invalid, decoded.value);
|
||||
try std.testing.expectEqual(@as(usize, 2), decoded.byte_len);
|
||||
}
|
||||
|
||||
test "codepointAt invalid utf8" {
|
||||
{
|
||||
const invalid_utf8 = "\xf0\xf0\x80\x80\x80";
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 2,
|
||||
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(3, invalid_utf8).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(4, invalid_utf8).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8));
|
||||
}
|
||||
|
||||
{
|
||||
const invalid_utf8 = "\xE1\xA0\xC0";
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 2,
|
||||
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(2, invalid_utf8).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8));
|
||||
}
|
||||
|
||||
{
|
||||
const invalid_utf8 = "\xD2";
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8));
|
||||
}
|
||||
|
||||
{
|
||||
const invalid_utf8 = "\xE1\xA0";
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 2,
|
||||
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
|
||||
}
|
||||
|
||||
{
|
||||
const invalid_utf8 = "\xC5\xFF";
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
|
||||
}
|
||||
}
|
||||
|
||||
test "codepointAt utf8 encoded" {
|
||||
const utf8_encoded = "²";
|
||||
|
||||
// with code page utf8
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = '²',
|
||||
.byte_len = 2,
|
||||
}, CodePage.utf8.codepointAt(0, utf8_encoded).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded));
|
||||
|
||||
// with code page windows1252
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = '\xC2',
|
||||
.byte_len = 1,
|
||||
}, CodePage.windows1252.codepointAt(0, utf8_encoded).?);
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = '\xB2',
|
||||
.byte_len = 1,
|
||||
}, CodePage.windows1252.codepointAt(1, utf8_encoded).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded));
|
||||
}
|
||||
|
||||
test "codepointAt windows1252 encoded" {
|
||||
const windows1252_encoded = "\xB2";
|
||||
|
||||
// with code page utf8
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = Codepoint.invalid,
|
||||
.byte_len = 1,
|
||||
}, CodePage.utf8.codepointAt(0, windows1252_encoded).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded));
|
||||
|
||||
// with code page windows1252
|
||||
try std.testing.expectEqual(Codepoint{
|
||||
.value = '\xB2',
|
||||
.byte_len = 1,
|
||||
}, CodePage.windows1252.codepointAt(0, windows1252_encoded).?);
|
||||
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded));
|
||||
}
|
||||
|
||||
pub const Codepoint = struct {
|
||||
value: u21,
|
||||
byte_len: usize,
|
||||
|
||||
pub const invalid: u21 = std.math.maxInt(u21);
|
||||
};
|
||||
340
src/resinator/comments.zig
Normal file
340
src/resinator/comments.zig
Normal file
@ -0,0 +1,340 @@
|
||||
//! Expects to run after a C preprocessor step that preserves comments.
|
||||
//!
|
||||
//! `rc` has a peculiar quirk where something like `blah/**/blah` will be
|
||||
//! transformed into `blahblah` during parsing. However, `clang -E` will
|
||||
//! transform it into `blah blah`, so in order to match `rc`, we need
|
||||
//! to remove comments ourselves after the preprocessor runs.
|
||||
//! Note: Multiline comments that actually span more than one line do
|
||||
//! get translated to a space character by `rc`.
|
||||
//!
|
||||
//! Removing comments before lexing also allows the lexer to not have to
|
||||
//! deal with comments which would complicate its implementation (this is something
|
||||
//! of a tradeoff, as removing comments in a separate pass means that we'll
|
||||
//! need to iterate the source twice instead of once, but having to deal with
|
||||
//! comments when lexing would be a pain).
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
|
||||
const SourceMappings = @import("source_mapping.zig").SourceMappings;
|
||||
const LineHandler = @import("lex.zig").LineHandler;
|
||||
const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;
|
||||
|
||||
/// `buf` must be at least as long as `source`
|
||||
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
|
||||
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 {
|
||||
std.debug.assert(buf.len >= source.len);
|
||||
var result = UncheckedSliceWriter{ .slice = buf };
|
||||
const State = enum {
|
||||
start,
|
||||
forward_slash,
|
||||
line_comment,
|
||||
multiline_comment,
|
||||
multiline_comment_end,
|
||||
single_quoted,
|
||||
single_quoted_escape,
|
||||
double_quoted,
|
||||
double_quoted_escape,
|
||||
};
|
||||
var state: State = .start;
|
||||
var index: usize = 0;
|
||||
var pending_start: ?usize = null;
|
||||
var line_handler = LineHandler{ .buffer = source };
|
||||
while (index < source.len) : (index += 1) {
|
||||
const c = source[index];
|
||||
// TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely
|
||||
// cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed
|
||||
// in the lexer, but comments are stripped before getting to the lexer.
|
||||
switch (state) {
|
||||
.start => switch (c) {
|
||||
'/' => {
|
||||
state = .forward_slash;
|
||||
pending_start = index;
|
||||
},
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
result.write(c);
|
||||
},
|
||||
else => {
|
||||
switch (c) {
|
||||
'"' => state = .double_quoted,
|
||||
'\'' => state = .single_quoted,
|
||||
else => {},
|
||||
}
|
||||
result.write(c);
|
||||
},
|
||||
},
|
||||
.forward_slash => switch (c) {
|
||||
'/' => state = .line_comment,
|
||||
'*' => {
|
||||
state = .multiline_comment;
|
||||
},
|
||||
else => {
|
||||
_ = line_handler.maybeIncrementLineNumber(index);
|
||||
result.writeSlice(source[pending_start.? .. index + 1]);
|
||||
pending_start = null;
|
||||
state = .start;
|
||||
},
|
||||
},
|
||||
.line_comment => switch (c) {
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
result.write(c);
|
||||
state = .start;
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.multiline_comment => switch (c) {
|
||||
'\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
|
||||
'\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
result.write(c);
|
||||
},
|
||||
'*' => state = .multiline_comment_end,
|
||||
else => {},
|
||||
},
|
||||
.multiline_comment_end => switch (c) {
|
||||
'\r' => {
|
||||
handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
|
||||
// We only want to treat this as a newline if it's part of a CRLF pair. If it's
|
||||
// not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
|
||||
// functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
|
||||
if (formsLineEndingPair(source, '\r', index + 1)) {
|
||||
state = .multiline_comment;
|
||||
}
|
||||
},
|
||||
'\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
result.write(c);
|
||||
state = .multiline_comment;
|
||||
},
|
||||
'/' => {
|
||||
state = .start;
|
||||
},
|
||||
else => {
|
||||
state = .multiline_comment;
|
||||
},
|
||||
},
|
||||
.single_quoted => switch (c) {
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
'\\' => {
|
||||
state = .single_quoted_escape;
|
||||
result.write(c);
|
||||
},
|
||||
'\'' => {
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
else => {
|
||||
result.write(c);
|
||||
},
|
||||
},
|
||||
.single_quoted_escape => switch (c) {
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
else => {
|
||||
state = .single_quoted;
|
||||
result.write(c);
|
||||
},
|
||||
},
|
||||
.double_quoted => switch (c) {
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
'\\' => {
|
||||
state = .double_quoted_escape;
|
||||
result.write(c);
|
||||
},
|
||||
'"' => {
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
else => {
|
||||
result.write(c);
|
||||
},
|
||||
},
|
||||
.double_quoted_escape => switch (c) {
|
||||
'\r', '\n' => {
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
state = .start;
|
||||
result.write(c);
|
||||
},
|
||||
else => {
|
||||
state = .double_quoted;
|
||||
result.write(c);
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
return result.getWritten();
|
||||
}
|
||||
|
||||
inline fn handleMultilineCarriageReturn(
|
||||
source: []const u8,
|
||||
line_handler: *LineHandler,
|
||||
index: usize,
|
||||
result: *UncheckedSliceWriter,
|
||||
source_mappings: ?*SourceMappings,
|
||||
) void {
|
||||
// Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
|
||||
// purposes of removing comments, but *should* be treated as a line ending for the
|
||||
// purposes of line counting/source mapping
|
||||
_ = line_handler.incrementLineNumber(index);
|
||||
// So only write the \r if it's part of a CRLF pair
|
||||
if (formsLineEndingPair(source, '\r', index + 1)) {
|
||||
result.write('\r');
|
||||
}
|
||||
// And otherwise, we want to collapse the source mapping so that we can still know which
|
||||
// line came from where.
|
||||
else {
|
||||
// Because the line gets collapsed, we need to decrement line number so that
|
||||
// the next collapse acts on the first of the collapsed line numbers
|
||||
line_handler.line_number -= 1;
|
||||
if (source_mappings) |mappings| {
|
||||
mappings.collapse(line_handler.line_number, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
|
||||
var buf = try allocator.alloc(u8, source.len);
|
||||
errdefer allocator.free(buf);
|
||||
var result = removeComments(source, buf, source_mappings);
|
||||
return allocator.realloc(buf, result.len);
|
||||
}
|
||||
|
||||
fn testRemoveComments(expected: []const u8, source: []const u8) !void {
|
||||
const result = try removeCommentsAlloc(std.testing.allocator, source, null);
|
||||
defer std.testing.allocator.free(result);
|
||||
|
||||
try std.testing.expectEqualStrings(expected, result);
|
||||
}
|
||||
|
||||
test "basic" {
|
||||
try testRemoveComments("", "// comment");
|
||||
try testRemoveComments("", "/* comment */");
|
||||
}
|
||||
|
||||
test "mixed" {
|
||||
try testRemoveComments("hello", "hello// comment");
|
||||
try testRemoveComments("hello", "hel/* comment */lo");
|
||||
}
|
||||
|
||||
test "within a string" {
|
||||
// escaped " is \"
|
||||
try testRemoveComments(
|
||||
\\blah"//som\"/*ething*/"BLAH
|
||||
,
|
||||
\\blah"//som\"/*ething*/"BLAH
|
||||
);
|
||||
}
|
||||
|
||||
test "line comments retain newlines" {
|
||||
try testRemoveComments(
|
||||
\\
|
||||
\\
|
||||
\\
|
||||
,
|
||||
\\// comment
|
||||
\\// comment
|
||||
\\// comment
|
||||
);
|
||||
|
||||
try testRemoveComments("\r\n", "//comment\r\n");
|
||||
}
|
||||
|
||||
test "crazy" {
|
||||
try testRemoveComments(
|
||||
\\blah"/*som*/\""BLAH
|
||||
,
|
||||
\\blah"/*som*/\""/*ething*/BLAH
|
||||
);
|
||||
|
||||
try testRemoveComments(
|
||||
\\blah"/*som*/"BLAH RCDATA "BEGIN END
|
||||
\\
|
||||
\\
|
||||
\\hello
|
||||
\\"
|
||||
,
|
||||
\\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END
|
||||
\\// comment
|
||||
\\//"blah blah" RCDATA {}
|
||||
\\hello
|
||||
\\"
|
||||
);
|
||||
}
|
||||
|
||||
test "multiline comment with newlines" {
|
||||
// bare \r is not treated as a newline
|
||||
try testRemoveComments("blahblah", "blah/*some\rthing*/blah");
|
||||
|
||||
try testRemoveComments(
|
||||
\\blah
|
||||
\\blah
|
||||
,
|
||||
\\blah/*some
|
||||
\\thing*/blah
|
||||
);
|
||||
try testRemoveComments(
|
||||
"blah\r\nblah",
|
||||
"blah/*some\r\nthing*/blah",
|
||||
);
|
||||
|
||||
// handle *<not /> correctly
|
||||
try testRemoveComments(
|
||||
\\blah
|
||||
\\
|
||||
\\
|
||||
,
|
||||
\\blah/*some
|
||||
\\thing*
|
||||
\\/bl*ah*/
|
||||
);
|
||||
}
|
||||
|
||||
test "comments appended to a line" {
|
||||
try testRemoveComments(
|
||||
\\blah
|
||||
\\blah
|
||||
,
|
||||
\\blah // line comment
|
||||
\\blah
|
||||
);
|
||||
try testRemoveComments(
|
||||
"blah \r\nblah",
|
||||
"blah // line comment\r\nblah",
|
||||
);
|
||||
}
|
||||
|
||||
test "remove comments with mappings" {
|
||||
const allocator = std.testing.allocator;
|
||||
var mut_source = "blah/*\rcommented line*\r/blah".*;
|
||||
var mappings = SourceMappings{};
|
||||
_ = try mappings.files.put(allocator, "test.rc");
|
||||
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 });
|
||||
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 });
|
||||
try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 });
|
||||
defer mappings.deinit(allocator);
|
||||
|
||||
var result = removeComments(&mut_source, &mut_source, &mappings);
|
||||
|
||||
try std.testing.expectEqualStrings("blahblah", result);
|
||||
try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len);
|
||||
try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line);
|
||||
}
|
||||
|
||||
test "in place" {
|
||||
var mut_source = "blah /* comment */ blah".*;
|
||||
var result = removeComments(&mut_source, &mut_source, null);
|
||||
try std.testing.expectEqualStrings("blah blah", result);
|
||||
}
|
||||
3356
src/resinator/compile.zig
Normal file
3356
src/resinator/compile.zig
Normal file
File diff suppressed because it is too large
Load Diff
1033
src/resinator/errors.zig
Normal file
1033
src/resinator/errors.zig
Normal file
File diff suppressed because it is too large
Load Diff
310
src/resinator/ico.zig
Normal file
310
src/resinator/ico.zig
Normal file
@ -0,0 +1,310 @@
|
||||
//! https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083
|
||||
//! https://learn.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10)
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/menurc/newheader
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/menurc/resdir
|
||||
//! https://learn.microsoft.com/en-us/windows/win32/menurc/localheader
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
pub const ReadError = std.mem.Allocator.Error || error{ InvalidHeader, InvalidImageType, ImpossibleDataSize, UnexpectedEOF, ReadError };
|
||||
|
||||
pub fn read(allocator: std.mem.Allocator, reader: anytype, max_size: u64) ReadError!IconDir {
|
||||
// Some Reader implementations have an empty ReadError error set which would
|
||||
// cause 'unreachable else' if we tried to use an else in the switch, so we
|
||||
// need to detect this case and not try to translate to ReadError
|
||||
const empty_reader_errorset = @typeInfo(@TypeOf(reader).Error).ErrorSet == null or @typeInfo(@TypeOf(reader).Error).ErrorSet.?.len == 0;
|
||||
if (empty_reader_errorset) {
|
||||
return readAnyError(allocator, reader, max_size) catch |err| switch (err) {
|
||||
error.EndOfStream => error.UnexpectedEOF,
|
||||
else => |e| return e,
|
||||
};
|
||||
} else {
|
||||
return readAnyError(allocator, reader, max_size) catch |err| switch (err) {
|
||||
error.OutOfMemory,
|
||||
error.InvalidHeader,
|
||||
error.InvalidImageType,
|
||||
error.ImpossibleDataSize,
|
||||
=> |e| return e,
|
||||
error.EndOfStream => error.UnexpectedEOF,
|
||||
// The remaining errors are dependent on the `reader`, so
|
||||
// we just translate them all to generic ReadError
|
||||
else => error.ReadError,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: This seems like a somewhat strange pattern, could be a better way
|
||||
// to do this. Maybe it makes more sense to handle the translation
|
||||
// at the call site instead of having a helper function here.
|
||||
pub fn readAnyError(allocator: std.mem.Allocator, reader: anytype, max_size: u64) !IconDir {
|
||||
const reserved = try reader.readIntLittle(u16);
|
||||
if (reserved != 0) {
|
||||
return error.InvalidHeader;
|
||||
}
|
||||
|
||||
const image_type = reader.readEnum(ImageType, .Little) catch |err| switch (err) {
|
||||
error.InvalidValue => return error.InvalidImageType,
|
||||
else => |e| return e,
|
||||
};
|
||||
|
||||
const num_images = try reader.readIntLittle(u16);
|
||||
|
||||
// To avoid over-allocation in the case of a file that says it has way more
|
||||
// entries than it actually does, we use an ArrayList with a conservatively
|
||||
// limited initial capacity instead of allocating the entire slice at once.
|
||||
const initial_capacity = @min(num_images, 8);
|
||||
var entries = try std.ArrayList(Entry).initCapacity(allocator, initial_capacity);
|
||||
errdefer entries.deinit();
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < num_images) : (i += 1) {
|
||||
var entry: Entry = undefined;
|
||||
entry.width = try reader.readByte();
|
||||
entry.height = try reader.readByte();
|
||||
entry.num_colors = try reader.readByte();
|
||||
entry.reserved = try reader.readByte();
|
||||
switch (image_type) {
|
||||
.icon => {
|
||||
entry.type_specific_data = .{ .icon = .{
|
||||
.color_planes = try reader.readIntLittle(u16),
|
||||
.bits_per_pixel = try reader.readIntLittle(u16),
|
||||
} };
|
||||
},
|
||||
.cursor => {
|
||||
entry.type_specific_data = .{ .cursor = .{
|
||||
.hotspot_x = try reader.readIntLittle(u16),
|
||||
.hotspot_y = try reader.readIntLittle(u16),
|
||||
} };
|
||||
},
|
||||
}
|
||||
entry.data_size_in_bytes = try reader.readIntLittle(u32);
|
||||
entry.data_offset_from_start_of_file = try reader.readIntLittle(u32);
|
||||
// Validate that the offset/data size is feasible
|
||||
if (@as(u64, entry.data_offset_from_start_of_file) + entry.data_size_in_bytes > max_size) {
|
||||
return error.ImpossibleDataSize;
|
||||
}
|
||||
// and that the data size is large enough for at least the header of an image
|
||||
// Note: This avoids needing to deal with a miscompilation from the Win32 RC
|
||||
// compiler when the data size of an image is specified as zero but there
|
||||
// is data to-be-read at the offset. The Win32 RC compiler will output
|
||||
// an ICON/CURSOR resource with a bogus size in its header but with no actual
|
||||
// data bytes in it, leading to an invalid .res. Similarly, if, for example,
|
||||
// there is valid PNG data at the image's offset, but the size is specified
|
||||
// as fewer bytes than the PNG header, then the Win32 RC compiler will still
|
||||
// treat it as a PNG (e.g. unconditionally set num_planes to 1) but the data
|
||||
// of the resource will only be 1 byte so treating it as a PNG doesn't make
|
||||
// sense (especially not when you have to read past the data size to determine
|
||||
// that it's a PNG).
|
||||
if (entry.data_size_in_bytes < 16) {
|
||||
return error.ImpossibleDataSize;
|
||||
}
|
||||
try entries.append(entry);
|
||||
}
|
||||
|
||||
return .{
|
||||
.image_type = image_type,
|
||||
.entries = try entries.toOwnedSlice(),
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
pub const ImageType = enum(u16) {
|
||||
icon = 1,
|
||||
cursor = 2,
|
||||
};
|
||||
|
||||
pub const IconDir = struct {
|
||||
image_type: ImageType,
|
||||
/// Note: entries.len will always fit into a u16, since the field containing the
|
||||
/// number of images in an ico file is a u16.
|
||||
entries: []Entry,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
pub fn deinit(self: IconDir) void {
|
||||
self.allocator.free(self.entries);
|
||||
}
|
||||
|
||||
pub const res_header_byte_len = 6;
|
||||
|
||||
pub fn getResDataSize(self: IconDir) u32 {
|
||||
// maxInt(u16) * Entry.res_byte_len = 917,490 which is well within the u32 range.
|
||||
// Note: self.entries.len is limited to maxInt(u16)
|
||||
return @intCast(IconDir.res_header_byte_len + self.entries.len * Entry.res_byte_len);
|
||||
}
|
||||
|
||||
pub fn writeResData(self: IconDir, writer: anytype, first_image_id: u16) !void {
|
||||
try writer.writeIntLittle(u16, 0);
|
||||
try writer.writeIntLittle(u16, @intFromEnum(self.image_type));
|
||||
// We know that entries.len must fit into a u16
|
||||
try writer.writeIntLittle(u16, @as(u16, @intCast(self.entries.len)));
|
||||
|
||||
var image_id = first_image_id;
|
||||
for (self.entries) |entry| {
|
||||
try entry.writeResData(writer, image_id);
|
||||
image_id += 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const Entry = struct {
|
||||
// Icons are limited to u8 sizes, cursors can have u16,
|
||||
// so we store as u16 and truncate when needed.
|
||||
width: u16,
|
||||
height: u16,
|
||||
num_colors: u8,
|
||||
/// This should always be zero, but whatever value it is gets
|
||||
/// carried over so we need to store it
|
||||
reserved: u8,
|
||||
type_specific_data: union(ImageType) {
|
||||
icon: struct {
|
||||
color_planes: u16,
|
||||
bits_per_pixel: u16,
|
||||
},
|
||||
cursor: struct {
|
||||
hotspot_x: u16,
|
||||
hotspot_y: u16,
|
||||
},
|
||||
},
|
||||
data_size_in_bytes: u32,
|
||||
data_offset_from_start_of_file: u32,
|
||||
|
||||
pub const res_byte_len = 14;
|
||||
|
||||
pub fn writeResData(self: Entry, writer: anytype, id: u16) !void {
|
||||
switch (self.type_specific_data) {
|
||||
.icon => |icon_data| {
|
||||
try writer.writeIntLittle(u8, @as(u8, @truncate(self.width)));
|
||||
try writer.writeIntLittle(u8, @as(u8, @truncate(self.height)));
|
||||
try writer.writeIntLittle(u8, self.num_colors);
|
||||
try writer.writeIntLittle(u8, self.reserved);
|
||||
try writer.writeIntLittle(u16, icon_data.color_planes);
|
||||
try writer.writeIntLittle(u16, icon_data.bits_per_pixel);
|
||||
try writer.writeIntLittle(u32, self.data_size_in_bytes);
|
||||
},
|
||||
.cursor => |cursor_data| {
|
||||
try writer.writeIntLittle(u16, self.width);
|
||||
try writer.writeIntLittle(u16, self.height);
|
||||
try writer.writeIntLittle(u16, cursor_data.hotspot_x);
|
||||
try writer.writeIntLittle(u16, cursor_data.hotspot_y);
|
||||
try writer.writeIntLittle(u32, self.data_size_in_bytes + 4);
|
||||
},
|
||||
}
|
||||
try writer.writeIntLittle(u16, id);
|
||||
}
|
||||
};
|
||||
|
||||
test "icon" {
|
||||
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
const icon = try read(std.testing.allocator, fbs.reader(), data.len);
|
||||
defer icon.deinit();
|
||||
|
||||
try std.testing.expectEqual(ImageType.icon, icon.image_type);
|
||||
try std.testing.expectEqual(@as(usize, 1), icon.entries.len);
|
||||
}
|
||||
|
||||
test "icon too many images" {
|
||||
// Note that with verifying that all data sizes are within the file bounds and >= 16,
|
||||
// it's not possible to hit EOF when looking for more RESDIR structures, since they are
|
||||
// themselves 16 bytes long, so we'll always hit ImpossibleDataSize instead.
|
||||
const data = "\x00\x00\x01\x00\x02\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
|
||||
}
|
||||
|
||||
test "icon data size past EOF" {
|
||||
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x01\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
|
||||
}
|
||||
|
||||
test "icon data offset past EOF" {
|
||||
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x17\x00\x00\x00" ++ [_]u8{0} ** 16;
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
|
||||
}
|
||||
|
||||
test "icon data size too small" {
|
||||
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x0F\x00\x00\x00\x16\x00\x00\x00";
|
||||
var fbs = std.io.fixedBufferStream(data);
|
||||
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
|
||||
}
|
||||
|
||||
pub const ImageFormat = enum {
|
||||
dib,
|
||||
png,
|
||||
riff,
|
||||
|
||||
const riff_header = std.mem.readIntNative(u32, "RIFF");
|
||||
const png_signature = std.mem.readIntNative(u64, "\x89PNG\r\n\x1a\n");
|
||||
const ihdr_code = std.mem.readIntNative(u32, "IHDR");
|
||||
const acon_form_type = std.mem.readIntNative(u32, "ACON");
|
||||
|
||||
pub fn detect(header_bytes: *const [16]u8) ImageFormat {
|
||||
if (std.mem.readIntNative(u32, header_bytes[0..4]) == riff_header) return .riff;
|
||||
if (std.mem.readIntNative(u64, header_bytes[0..8]) == png_signature) return .png;
|
||||
return .dib;
|
||||
}
|
||||
|
||||
pub fn validate(format: ImageFormat, header_bytes: *const [16]u8) bool {
|
||||
return switch (format) {
|
||||
.png => std.mem.readIntNative(u32, header_bytes[12..16]) == ihdr_code,
|
||||
.riff => std.mem.readIntNative(u32, header_bytes[8..12]) == acon_form_type,
|
||||
.dib => true,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Contains only the fields of BITMAPINFOHEADER (WinGDI.h) that are both:
|
||||
/// - relevant to what we need, and
|
||||
/// - are shared between all versions of BITMAPINFOHEADER (V4, V5).
|
||||
pub const BitmapHeader = extern struct {
|
||||
bcSize: u32,
|
||||
bcWidth: i32,
|
||||
bcHeight: i32,
|
||||
bcPlanes: u16,
|
||||
bcBitCount: u16,
|
||||
|
||||
pub fn version(self: *const BitmapHeader) Version {
|
||||
return Version.get(self.bcSize);
|
||||
}
|
||||
|
||||
/// https://en.wikipedia.org/wiki/BMP_file_format#DIB_header_(bitmap_information_header)
|
||||
pub const Version = enum {
|
||||
unknown,
|
||||
@"win2.0", // Windows 2.0 or later
|
||||
@"nt3.1", // Windows NT, 3.1x or later
|
||||
@"nt4.0", // Windows NT 4.0, 95 or later
|
||||
@"nt5.0", // Windows NT 5.0, 98 or later
|
||||
|
||||
pub fn get(header_size: u32) Version {
|
||||
return switch (header_size) {
|
||||
len(.@"win2.0") => .@"win2.0",
|
||||
len(.@"nt3.1") => .@"nt3.1",
|
||||
len(.@"nt4.0") => .@"nt4.0",
|
||||
len(.@"nt5.0") => .@"nt5.0",
|
||||
else => .unknown,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn len(comptime v: Version) comptime_int {
|
||||
return switch (v) {
|
||||
.@"win2.0" => 12,
|
||||
.@"nt3.1" => 40,
|
||||
.@"nt4.0" => 108,
|
||||
.@"nt5.0" => 124,
|
||||
.unknown => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn nameForErrorDisplay(v: Version) []const u8 {
|
||||
return switch (v) {
|
||||
.unknown => "unknown",
|
||||
.@"win2.0" => "Windows 2.0 (BITMAPCOREHEADER)",
|
||||
.@"nt3.1" => "Windows NT, 3.1x (BITMAPINFOHEADER)",
|
||||
.@"nt4.0" => "Windows NT 4.0, 95 (BITMAPV4HEADER)",
|
||||
.@"nt5.0" => "Windows NT 5.0, 98 (BITMAPV5HEADER)",
|
||||
};
|
||||
}
|
||||
};
|
||||
};
|
||||
877
src/resinator/lang.zig
Normal file
877
src/resinator/lang.zig
Normal file
@ -0,0 +1,877 @@
|
||||
const std = @import("std");
|
||||
|
||||
/// This function is specific to how the Win32 RC command line interprets
|
||||
/// language IDs specified as integers.
|
||||
/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed
|
||||
/// - Wraps on overflow of u16
|
||||
/// - Stops parsing on any invalid hexadecimal digits
|
||||
/// - Errors if a digit is not the first char
|
||||
/// - `-` (negative) prefix is allowed
|
||||
pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 {
|
||||
var result: u16 = 0;
|
||||
const radix: u8 = 16;
|
||||
var buf = str;
|
||||
|
||||
const Prefix = enum { none, minus };
|
||||
var prefix: Prefix = .none;
|
||||
switch (buf[0]) {
|
||||
'-' => {
|
||||
prefix = .minus;
|
||||
buf = buf[1..];
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') {
|
||||
buf = buf[2..];
|
||||
}
|
||||
|
||||
for (buf, 0..) |c, i| {
|
||||
const digit = switch (c) {
|
||||
// On invalid digit for the radix, just stop parsing but don't fail
|
||||
'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
|
||||
else => {
|
||||
// First digit must be valid
|
||||
if (i == 0) {
|
||||
return error.InvalidLanguageId;
|
||||
}
|
||||
break;
|
||||
},
|
||||
};
|
||||
|
||||
if (result != 0) {
|
||||
result *%= radix;
|
||||
}
|
||||
result +%= digit;
|
||||
}
|
||||
|
||||
switch (prefix) {
|
||||
.none => {},
|
||||
.minus => result = 0 -% result,
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test parseInt {
|
||||
try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16"));
|
||||
try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A"));
|
||||
try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz"));
|
||||
try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1"));
|
||||
try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16"));
|
||||
try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100"));
|
||||
try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001"));
|
||||
try std.testing.expectError(error.InvalidLanguageId, parseInt("--1"));
|
||||
try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha"));
|
||||
try std.testing.expectError(error.InvalidLanguageId, parseInt("¹"));
|
||||
try std.testing.expectError(error.InvalidLanguageId, parseInt("~1"));
|
||||
}
|
||||
|
||||
/// This function is specific to how the Win32 RC command line interprets
|
||||
/// language tags: invalid tags are rejected, but tags that don't have
|
||||
/// a specific assigned ID but are otherwise valid enough will get
|
||||
/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED.
|
||||
pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 {
|
||||
const maybe_id = try tagToId(tag);
|
||||
if (maybe_id) |id| {
|
||||
return @intFromEnum(id);
|
||||
} else {
|
||||
return LOCALE_CUSTOM_UNSPECIFIED;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId {
|
||||
const parsed = try parse(tag);
|
||||
// There are currently no language tags with assigned IDs that have
|
||||
// multiple suffixes, so we can skip the lookup.
|
||||
if (parsed.multiple_suffixes) return null;
|
||||
const longest_known_tag = comptime blk: {
|
||||
var len = 0;
|
||||
for (@typeInfo(LanguageId).Enum.fields) |field| {
|
||||
if (field.name.len > len) len = field.name.len;
|
||||
}
|
||||
break :blk len;
|
||||
};
|
||||
// If the tag is longer than the longest tag that has an assigned ID,
|
||||
// then we can skip the lookup.
|
||||
if (tag.len > longest_known_tag) return null;
|
||||
var normalized_buf: [longest_known_tag]u8 = undefined;
|
||||
// To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to
|
||||
// omit the suffix, but only if the tag contains a valid alternate sort order.
|
||||
var tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag;
|
||||
const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf);
|
||||
return std.meta.stringToEnum(LanguageId, normalized_tag) orelse {
|
||||
// special case for a tag that has been mapped to the same ID
|
||||
// twice.
|
||||
if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) {
|
||||
return LanguageId.ff_ng;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
}
|
||||
|
||||
test tagToId {
|
||||
try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?);
|
||||
try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?);
|
||||
try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?);
|
||||
// Special case
|
||||
try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?);
|
||||
}
|
||||
|
||||
test "exhaustive tagToId" {
|
||||
inline for (@typeInfo(LanguageId).Enum.fields) |field| {
|
||||
const id = tagToId(field.name) catch |err| {
|
||||
std.debug.print("tag: {s}\n", .{field.name});
|
||||
return err;
|
||||
};
|
||||
try std.testing.expectEqual(@field(LanguageId, field.name), id orelse {
|
||||
std.debug.print("tag: {s}, got null\n", .{field.name});
|
||||
return error.TestExpectedEqual;
|
||||
});
|
||||
}
|
||||
var buf: [32]u8 = undefined;
|
||||
inline for (valid_alternate_sorts) |parsed_sort| {
|
||||
var fbs = std.io.fixedBufferStream(&buf);
|
||||
const writer = fbs.writer();
|
||||
writer.writeAll(parsed_sort.language_code) catch unreachable;
|
||||
writer.writeAll("-") catch unreachable;
|
||||
writer.writeAll(parsed_sort.country_code.?) catch unreachable;
|
||||
writer.writeAll("-") catch unreachable;
|
||||
writer.writeAll(parsed_sort.suffix.?) catch unreachable;
|
||||
const expected_field_name = comptime field: {
|
||||
var name_buf: [5]u8 = undefined;
|
||||
std.mem.copy(u8, &name_buf, parsed_sort.language_code);
|
||||
name_buf[2] = '_';
|
||||
std.mem.copy(u8, name_buf[3..], parsed_sort.country_code.?);
|
||||
break :field name_buf;
|
||||
};
|
||||
const expected = @field(LanguageId, &expected_field_name);
|
||||
const id = tagToId(fbs.getWritten()) catch |err| {
|
||||
std.debug.print("tag: {s}\n", .{fbs.getWritten()});
|
||||
return err;
|
||||
};
|
||||
try std.testing.expectEqual(expected, id orelse {
|
||||
std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected });
|
||||
return error.TestExpectedEqual;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn normalizeTag(tag: []const u8, buf: []u8) []u8 {
|
||||
std.debug.assert(buf.len >= tag.len);
|
||||
for (tag, 0..) |c, i| {
|
||||
if (c == '-')
|
||||
buf[i] = '_'
|
||||
else
|
||||
buf[i] = std.ascii.toLower(c);
|
||||
}
|
||||
return buf[0..tag.len];
|
||||
}
|
||||
|
||||
/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D
|
||||
/// "When an LCID is requested for a locale without a
|
||||
/// permanent LCID assignment, nor a temporary
|
||||
/// assignment as above, the protocol will respond
|
||||
/// with LOCALE_CUSTOM_UNSPECIFIED for all such
|
||||
/// locales. Because this single value is used for
|
||||
/// numerous possible locale names, it is impossible to
|
||||
/// round trip this locale, even temporarily.
|
||||
/// Applications should discard this value as soon as
|
||||
/// possible and never persist it. If the system is
|
||||
/// forced to respond to a request for
|
||||
/// LCID_CUSTOM_UNSPECIFIED, it will fall back to
|
||||
/// the current user locale. This is often incorrect but
|
||||
/// may prevent an application or component from
|
||||
/// failing. As the meaning of this temporary LCID is
|
||||
/// unstable, it should never be used for interchange
|
||||
/// or persisted data. This is a 1-to-many relationship
|
||||
/// that is very unstable."
|
||||
pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000;
|
||||
|
||||
pub const LANG_ENGLISH = 0x09;
|
||||
pub const SUBLANG_ENGLISH_US = 0x01;
|
||||
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
|
||||
pub fn MAKELANGID(primary: u10, sublang: u6) u16 {
|
||||
return (@as(u16, primary) << 10) | sublang;
|
||||
}
|
||||
|
||||
/// Language tag format expressed as a regular expression (rough approximation):
|
||||
///
|
||||
/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})?
|
||||
/// lang | script | country | suffix
|
||||
///
|
||||
/// Notes:
|
||||
/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix
|
||||
/// parsing rules (e.g. `a-0` and `a-00000000` are allowed).
|
||||
/// - There can also be any number of trailing suffix parts as long as they each
|
||||
/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed.
|
||||
/// - When doing lookups, trailing suffix parts are taken into account, e.g.
|
||||
/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`.
|
||||
/// - A suffix is only allowed if:
|
||||
/// + Lang code is 1 char long, or
|
||||
/// + A country code is present, or
|
||||
/// + A script tag is not present and:
|
||||
/// - the suffix is numeric-only and has a length of 3, or
|
||||
/// - the lang is `qps` and the suffix is `ploca` or `plocm`
|
||||
pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed {
|
||||
var it = std.mem.splitAny(u8, lang_tag, "-_");
|
||||
const lang_code = it.first();
|
||||
const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code);
|
||||
if (!is_valid_lang_code) return error.InvalidLanguageTag;
|
||||
var parsed = Parsed{
|
||||
.language_code = lang_code,
|
||||
};
|
||||
// The second part could be a script tag, a country code, or a suffix
|
||||
if (it.next()) |part_str| {
|
||||
// The lang code being length 1 behaves strangely, so fully special case it.
|
||||
if (lang_code.len == 1) {
|
||||
// This is almost certainly not the 'right' way to do this, but I don't have a method
|
||||
// to determine how exactly these language tags are parsed, and it seems like
|
||||
// suffix parsing rules apply generally (digits allowed, length of 1 to 8).
|
||||
//
|
||||
// However, because we want to be able to lookup `x-iv-mathan` normally without
|
||||
// `multiple_suffixes` being set to true, we need to make sure to treat two-length
|
||||
// alphabetic parts as a country code.
|
||||
if (part_str.len == 2 and isAllAlphabetic(part_str)) {
|
||||
parsed.country_code = part_str;
|
||||
}
|
||||
// Everything else, though, we can just throw into the suffix as long as the normal
|
||||
// rules apply.
|
||||
else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) {
|
||||
parsed.suffix = part_str;
|
||||
} else {
|
||||
return error.InvalidLanguageTag;
|
||||
}
|
||||
} else if (part_str.len == 4 and isAllAlphabetic(part_str)) {
|
||||
parsed.script_tag = part_str;
|
||||
} else if (part_str.len == 2 and isAllAlphabetic(part_str)) {
|
||||
parsed.country_code = part_str;
|
||||
}
|
||||
// Only a 3-len numeric suffix is allowed as the second part of a tag
|
||||
else if (part_str.len == 3 and isAllNumeric(part_str)) {
|
||||
parsed.suffix = part_str;
|
||||
}
|
||||
// Special case for qps-ploca and qps-plocm
|
||||
else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and
|
||||
(std.ascii.eqlIgnoreCase(part_str, "ploca") or
|
||||
std.ascii.eqlIgnoreCase(part_str, "plocm")))
|
||||
{
|
||||
parsed.suffix = part_str;
|
||||
} else {
|
||||
return error.InvalidLanguageTag;
|
||||
}
|
||||
} else {
|
||||
// If there's no part besides a 1-len lang code, then it is malformed
|
||||
if (lang_code.len == 1) return error.InvalidLanguageTag;
|
||||
return parsed;
|
||||
}
|
||||
if (parsed.script_tag != null) {
|
||||
if (it.next()) |part_str| {
|
||||
if (part_str.len == 2 and isAllAlphabetic(part_str)) {
|
||||
parsed.country_code = part_str;
|
||||
} else {
|
||||
// Suffix is not allowed when a country code is not present.
|
||||
return error.InvalidLanguageTag;
|
||||
}
|
||||
} else {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
// We've now parsed any potential script tag/country codes, so anything remaining
|
||||
// is a suffix
|
||||
while (it.next()) |part_str| {
|
||||
if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) {
|
||||
return error.InvalidLanguageTag;
|
||||
}
|
||||
if (parsed.suffix == null) {
|
||||
parsed.suffix = part_str;
|
||||
} else {
|
||||
// In theory we could return early here but we still want to validate
|
||||
// that each part is a valid suffix all the way to the end, e.g.
|
||||
// we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!`
|
||||
// suffix part.
|
||||
parsed.multiple_suffixes = true;
|
||||
}
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
pub const Parsed = struct {
|
||||
language_code: []const u8,
|
||||
script_tag: ?[]const u8 = null,
|
||||
country_code: ?[]const u8 = null,
|
||||
/// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc
|
||||
suffix: ?[]const u8 = null,
|
||||
/// There can be any number of suffixes, but we don't need to care what their
|
||||
/// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah`
|
||||
/// can be seen as different from `ca-es-valencia`. Storing this as a bool
|
||||
/// allows us to avoid needing either (a) dynamic allocation or (b) a limit to
|
||||
/// the number of suffixes allowed when parsing.
|
||||
multiple_suffixes: bool = false,
|
||||
|
||||
pub fn isSuffixValidSortOrder(self: Parsed) bool {
|
||||
if (self.country_code == null) return false;
|
||||
if (self.suffix == null) return false;
|
||||
if (self.script_tag != null) return false;
|
||||
if (self.multiple_suffixes) return false;
|
||||
for (valid_alternate_sorts) |valid_sort| {
|
||||
if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and
|
||||
std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and
|
||||
std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
|
||||
/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table."
|
||||
const valid_alternate_sorts = [_]Parsed{
|
||||
// Note: x-IV-mathan is omitted due to how lookups are implemented.
|
||||
// This table is used to make e.g. `de-de_phoneb` get looked up
|
||||
// as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan
|
||||
// instead needs to be looked up with the suffix included because
|
||||
// `x-iv` is not a tag with an assigned ID.
|
||||
.{ .language_code = "de", .country_code = "de", .suffix = "phoneb" },
|
||||
.{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" },
|
||||
.{ .language_code = "ka", .country_code = "ge", .suffix = "modern" },
|
||||
.{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" },
|
||||
.{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" },
|
||||
.{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" },
|
||||
.{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" },
|
||||
.{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" },
|
||||
.{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" },
|
||||
.{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" },
|
||||
.{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" },
|
||||
.{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" },
|
||||
.{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" },
|
||||
};
|
||||
|
||||
test "parse" {
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "en",
|
||||
}, try parse("en"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "en",
|
||||
.country_code = "us",
|
||||
}, try parse("en-us"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "en",
|
||||
.suffix = "123",
|
||||
}, try parse("en-123"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "en",
|
||||
.suffix = "123",
|
||||
.multiple_suffixes = true,
|
||||
}, try parse("en-123-blah"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "en",
|
||||
.country_code = "us",
|
||||
.suffix = "123",
|
||||
.multiple_suffixes = true,
|
||||
}, try parse("en-us_123-blah"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "eng",
|
||||
.script_tag = "Latn",
|
||||
}, try parse("eng-Latn"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "eng",
|
||||
.script_tag = "Latn",
|
||||
}, try parse("eng-Latn"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "ff",
|
||||
.script_tag = "Latn",
|
||||
.country_code = "NG",
|
||||
}, try parse("ff-Latn-NG"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "qps",
|
||||
.suffix = "Plocm",
|
||||
}, try parse("qps-Plocm"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "qps",
|
||||
.suffix = "ploca",
|
||||
}, try parse("qps-ploca"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "x",
|
||||
.country_code = "IV",
|
||||
.suffix = "mathan",
|
||||
}, try parse("x-IV-mathan"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "a",
|
||||
.suffix = "a",
|
||||
}, try parse("a-a"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "a",
|
||||
.suffix = "000",
|
||||
}, try parse("a-000"));
|
||||
try std.testing.expectEqualDeep(Parsed{
|
||||
.language_code = "a",
|
||||
.suffix = "00000000",
|
||||
}, try parse("a-00000000"));
|
||||
// suffix not allowed if script tag is present without country code
|
||||
try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix"));
|
||||
// suffix must be 3 numeric digits if neither script tag nor country code is present
|
||||
try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix"));
|
||||
try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm"));
|
||||
// 1-len lang code is not allowed if it's the only part
|
||||
try std.testing.expectError(error.InvalidLanguageTag, parse("e"));
|
||||
}
|
||||
|
||||
fn isAllAlphabetic(str: []const u8) bool {
|
||||
for (str) |c| {
|
||||
if (!std.ascii.isAlphabetic(c)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn isAllAlphanumeric(str: []const u8) bool {
|
||||
for (str) |c| {
|
||||
if (!std.ascii.isAlphanumeric(c)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn isAllNumeric(str: []const u8) bool {
|
||||
for (str) |c| {
|
||||
if (!std.ascii.isDigit(c)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
|
||||
/// - Protocol Revision: 15.0
|
||||
/// - Language / Language ID / Language Tag table in Appendix A
|
||||
/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED)
|
||||
/// - Normalized each language tag (lowercased, replaced all `-` with `_`)
|
||||
/// - There is one special case where two tags are mapped to the same ID, the following
|
||||
/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467.
|
||||
/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria
|
||||
/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts
|
||||
/// table as 0x007F (LANG_INVARIANT).
|
||||
pub const LanguageId = enum(u16) {
|
||||
// Language tag = Language ID, // Language, Location (or type)
|
||||
af = 0x0036, // Afrikaans
|
||||
af_za = 0x0436, // Afrikaans, South Africa
|
||||
sq = 0x001C, // Albanian
|
||||
sq_al = 0x041C, // Albanian, Albania
|
||||
gsw = 0x0084, // Alsatian
|
||||
gsw_fr = 0x0484, // Alsatian, France
|
||||
am = 0x005E, // Amharic
|
||||
am_et = 0x045E, // Amharic, Ethiopia
|
||||
ar = 0x0001, // Arabic
|
||||
ar_dz = 0x1401, // Arabic, Algeria
|
||||
ar_bh = 0x3C01, // Arabic, Bahrain
|
||||
ar_eg = 0x0c01, // Arabic, Egypt
|
||||
ar_iq = 0x0801, // Arabic, Iraq
|
||||
ar_jo = 0x2C01, // Arabic, Jordan
|
||||
ar_kw = 0x3401, // Arabic, Kuwait
|
||||
ar_lb = 0x3001, // Arabic, Lebanon
|
||||
ar_ly = 0x1001, // Arabic, Libya
|
||||
ar_ma = 0x1801, // Arabic, Morocco
|
||||
ar_om = 0x2001, // Arabic, Oman
|
||||
ar_qa = 0x4001, // Arabic, Qatar
|
||||
ar_sa = 0x0401, // Arabic, Saudi Arabia
|
||||
ar_sy = 0x2801, // Arabic, Syria
|
||||
ar_tn = 0x1C01, // Arabic, Tunisia
|
||||
ar_ae = 0x3801, // Arabic, U.A.E.
|
||||
ar_ye = 0x2401, // Arabic, Yemen
|
||||
hy = 0x002B, // Armenian
|
||||
hy_am = 0x042B, // Armenian, Armenia
|
||||
as = 0x004D, // Assamese
|
||||
as_in = 0x044D, // Assamese, India
|
||||
az_cyrl = 0x742C, // Azerbaijani (Cyrillic)
|
||||
az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan
|
||||
az = 0x002C, // Azerbaijani (Latin)
|
||||
az_latn = 0x782C, // Azerbaijani (Latin)
|
||||
az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan
|
||||
bn = 0x0045, // Bangla
|
||||
bn_bd = 0x0845, // Bangla, Bangladesh
|
||||
bn_in = 0x0445, // Bangla, India
|
||||
ba = 0x006D, // Bashkir
|
||||
ba_ru = 0x046D, // Bashkir, Russia
|
||||
eu = 0x002D, // Basque
|
||||
eu_es = 0x042D, // Basque, Spain
|
||||
be = 0x0023, // Belarusian
|
||||
be_by = 0x0423, // Belarusian, Belarus
|
||||
bs_cyrl = 0x641A, // Bosnian (Cyrillic)
|
||||
bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina
|
||||
bs_latn = 0x681A, // Bosnian (Latin)
|
||||
bs = 0x781A, // Bosnian (Latin)
|
||||
bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina
|
||||
br = 0x007E, // Breton
|
||||
br_fr = 0x047E, // Breton, France
|
||||
bg = 0x0002, // Bulgarian
|
||||
bg_bg = 0x0402, // Bulgarian, Bulgaria
|
||||
my = 0x0055, // Burmese
|
||||
my_mm = 0x0455, // Burmese, Myanmar
|
||||
ca = 0x0003, // Catalan
|
||||
ca_es = 0x0403, // Catalan, Spain
|
||||
tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco
|
||||
ku = 0x0092, // Central Kurdish
|
||||
ku_arab = 0x7c92, // Central Kurdish
|
||||
ku_arab_iq = 0x0492, // Central Kurdish, Iraq
|
||||
chr = 0x005C, // Cherokee
|
||||
chr_cher = 0x7c5C, // Cherokee
|
||||
chr_cher_us = 0x045C, // Cherokee, United States
|
||||
zh_hans = 0x0004, // Chinese (Simplified)
|
||||
zh = 0x7804, // Chinese (Simplified)
|
||||
zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China
|
||||
zh_sg = 0x1004, // Chinese (Simplified), Singapore
|
||||
zh_hant = 0x7C04, // Chinese (Traditional)
|
||||
zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R.
|
||||
zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R.
|
||||
zh_tw = 0x0404, // Chinese (Traditional), Taiwan
|
||||
co = 0x0083, // Corsican
|
||||
co_fr = 0x0483, // Corsican, France
|
||||
hr = 0x001A, // Croatian
|
||||
hr_hr = 0x041A, // Croatian, Croatia
|
||||
hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina
|
||||
cs = 0x0005, // Czech
|
||||
cs_cz = 0x0405, // Czech, Czech Republic
|
||||
da = 0x0006, // Danish
|
||||
da_dk = 0x0406, // Danish, Denmark
|
||||
prs = 0x008C, // Dari
|
||||
prs_af = 0x048C, // Dari, Afghanistan
|
||||
dv = 0x0065, // Divehi
|
||||
dv_mv = 0x0465, // Divehi, Maldives
|
||||
nl = 0x0013, // Dutch
|
||||
nl_be = 0x0813, // Dutch, Belgium
|
||||
nl_nl = 0x0413, // Dutch, Netherlands
|
||||
dz_bt = 0x0C51, // Dzongkha, Bhutan
|
||||
en = 0x0009, // English
|
||||
en_au = 0x0C09, // English, Australia
|
||||
en_bz = 0x2809, // English, Belize
|
||||
en_ca = 0x1009, // English, Canada
|
||||
en_029 = 0x2409, // English, Caribbean
|
||||
en_hk = 0x3C09, // English, Hong Kong
|
||||
en_in = 0x4009, // English, India
|
||||
en_ie = 0x1809, // English, Ireland
|
||||
en_jm = 0x2009, // English, Jamaica
|
||||
en_my = 0x4409, // English, Malaysia
|
||||
en_nz = 0x1409, // English, New Zealand
|
||||
en_ph = 0x3409, // English, Republic of the Philippines
|
||||
en_sg = 0x4809, // English, Singapore
|
||||
en_za = 0x1C09, // English, South Africa
|
||||
en_tt = 0x2c09, // English, Trinidad and Tobago
|
||||
en_ae = 0x4C09, // English, United Arab Emirates
|
||||
en_gb = 0x0809, // English, United Kingdom
|
||||
en_us = 0x0409, // English, United States
|
||||
en_zw = 0x3009, // English, Zimbabwe
|
||||
et = 0x0025, // Estonian
|
||||
et_ee = 0x0425, // Estonian, Estonia
|
||||
fo = 0x0038, // Faroese
|
||||
fo_fo = 0x0438, // Faroese, Faroe Islands
|
||||
fil = 0x0064, // Filipino
|
||||
fil_ph = 0x0464, // Filipino, Philippines
|
||||
fi = 0x000B, // Finnish
|
||||
fi_fi = 0x040B, // Finnish, Finland
|
||||
fr = 0x000C, // French
|
||||
fr_be = 0x080C, // French, Belgium
|
||||
fr_cm = 0x2c0C, // French, Cameroon
|
||||
fr_ca = 0x0c0C, // French, Canada
|
||||
fr_029 = 0x1C0C, // French, Caribbean
|
||||
fr_cd = 0x240C, // French, Congo, DRC
|
||||
fr_ci = 0x300C, // French, Côte d'Ivoire
|
||||
fr_fr = 0x040C, // French, France
|
||||
fr_ht = 0x3c0C, // French, Haiti
|
||||
fr_lu = 0x140C, // French, Luxembourg
|
||||
fr_ml = 0x340C, // French, Mali
|
||||
fr_ma = 0x380C, // French, Morocco
|
||||
fr_mc = 0x180C, // French, Principality of Monaco
|
||||
fr_re = 0x200C, // French, Reunion
|
||||
fr_sn = 0x280C, // French, Senegal
|
||||
fr_ch = 0x100C, // French, Switzerland
|
||||
fy = 0x0062, // Frisian
|
||||
fy_nl = 0x0462, // Frisian, Netherlands
|
||||
ff = 0x0067, // Fulah
|
||||
ff_latn = 0x7C67, // Fulah (Latin)
|
||||
ff_ng = 0x0467, // Fulah, Nigeria
|
||||
ff_latn_sn = 0x0867, // Fulah, Senegal
|
||||
gl = 0x0056, // Galician
|
||||
gl_es = 0x0456, // Galician, Spain
|
||||
ka = 0x0037, // Georgian
|
||||
ka_ge = 0x0437, // Georgian, Georgia
|
||||
de = 0x0007, // German
|
||||
de_at = 0x0C07, // German, Austria
|
||||
de_de = 0x0407, // German, Germany
|
||||
de_li = 0x1407, // German, Liechtenstein
|
||||
de_lu = 0x1007, // German, Luxembourg
|
||||
de_ch = 0x0807, // German, Switzerland
|
||||
el = 0x0008, // Greek
|
||||
el_gr = 0x0408, // Greek, Greece
|
||||
kl = 0x006F, // Greenlandic
|
||||
kl_gl = 0x046F, // Greenlandic, Greenland
|
||||
gn = 0x0074, // Guarani
|
||||
gn_py = 0x0474, // Guarani, Paraguay
|
||||
gu = 0x0047, // Gujarati
|
||||
gu_in = 0x0447, // Gujarati, India
|
||||
ha = 0x0068, // Hausa (Latin)
|
||||
ha_latn = 0x7C68, // Hausa (Latin)
|
||||
ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria
|
||||
haw = 0x0075, // Hawaiian
|
||||
haw_us = 0x0475, // Hawaiian, United States
|
||||
he = 0x000D, // Hebrew
|
||||
he_il = 0x040D, // Hebrew, Israel
|
||||
hi = 0x0039, // Hindi
|
||||
hi_in = 0x0439, // Hindi, India
|
||||
hu = 0x000E, // Hungarian
|
||||
hu_hu = 0x040E, // Hungarian, Hungary
|
||||
is = 0x000F, // Icelandic
|
||||
is_is = 0x040F, // Icelandic, Iceland
|
||||
ig = 0x0070, // Igbo
|
||||
ig_ng = 0x0470, // Igbo, Nigeria
|
||||
id = 0x0021, // Indonesian
|
||||
id_id = 0x0421, // Indonesian, Indonesia
|
||||
iu = 0x005D, // Inuktitut (Latin)
|
||||
iu_latn = 0x7C5D, // Inuktitut (Latin)
|
||||
iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada
|
||||
iu_cans = 0x785D, // Inuktitut (Syllabics)
|
||||
iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada
|
||||
ga = 0x003C, // Irish
|
||||
ga_ie = 0x083C, // Irish, Ireland
|
||||
it = 0x0010, // Italian
|
||||
it_it = 0x0410, // Italian, Italy
|
||||
it_ch = 0x0810, // Italian, Switzerland
|
||||
ja = 0x0011, // Japanese
|
||||
ja_jp = 0x0411, // Japanese, Japan
|
||||
kn = 0x004B, // Kannada
|
||||
kn_in = 0x044B, // Kannada, India
|
||||
kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria
|
||||
ks = 0x0060, // Kashmiri
|
||||
ks_arab = 0x0460, // Kashmiri, Perso-Arabic
|
||||
ks_deva_in = 0x0860, // Kashmiri (Devanagari), India
|
||||
kk = 0x003F, // Kazakh
|
||||
kk_kz = 0x043F, // Kazakh, Kazakhstan
|
||||
km = 0x0053, // Khmer
|
||||
km_kh = 0x0453, // Khmer, Cambodia
|
||||
quc = 0x0086, // K'iche
|
||||
quc_latn_gt = 0x0486, // K'iche, Guatemala
|
||||
rw = 0x0087, // Kinyarwanda
|
||||
rw_rw = 0x0487, // Kinyarwanda, Rwanda
|
||||
sw = 0x0041, // Kiswahili
|
||||
sw_ke = 0x0441, // Kiswahili, Kenya
|
||||
kok = 0x0057, // Konkani
|
||||
kok_in = 0x0457, // Konkani, India
|
||||
ko = 0x0012, // Korean
|
||||
ko_kr = 0x0412, // Korean, Korea
|
||||
ky = 0x0040, // Kyrgyz
|
||||
ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan
|
||||
lo = 0x0054, // Lao
|
||||
lo_la = 0x0454, // Lao, Lao P.D.R.
|
||||
la_va = 0x0476, // Latin, Vatican City
|
||||
lv = 0x0026, // Latvian
|
||||
lv_lv = 0x0426, // Latvian, Latvia
|
||||
lt = 0x0027, // Lithuanian
|
||||
lt_lt = 0x0427, // Lithuanian, Lithuania
|
||||
dsb = 0x7C2E, // Lower Sorbian
|
||||
dsb_de = 0x082E, // Lower Sorbian, Germany
|
||||
lb = 0x006E, // Luxembourgish
|
||||
lb_lu = 0x046E, // Luxembourgish, Luxembourg
|
||||
mk = 0x002F, // Macedonian
|
||||
mk_mk = 0x042F, // Macedonian, North Macedonia
|
||||
ms = 0x003E, // Malay
|
||||
ms_bn = 0x083E, // Malay, Brunei Darussalam
|
||||
ms_my = 0x043E, // Malay, Malaysia
|
||||
ml = 0x004C, // Malayalam
|
||||
ml_in = 0x044C, // Malayalam, India
|
||||
mt = 0x003A, // Maltese
|
||||
mt_mt = 0x043A, // Maltese, Malta
|
||||
mi = 0x0081, // Maori
|
||||
mi_nz = 0x0481, // Maori, New Zealand
|
||||
arn = 0x007A, // Mapudungun
|
||||
arn_cl = 0x047A, // Mapudungun, Chile
|
||||
mr = 0x004E, // Marathi
|
||||
mr_in = 0x044E, // Marathi, India
|
||||
moh = 0x007C, // Mohawk
|
||||
moh_ca = 0x047C, // Mohawk, Canada
|
||||
mn = 0x0050, // Mongolian (Cyrillic)
|
||||
mn_cyrl = 0x7850, // Mongolian (Cyrillic)
|
||||
mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia
|
||||
mn_mong = 0x7C50, // Mongolian (Traditional Mongolian)
|
||||
mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China
|
||||
mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia
|
||||
ne = 0x0061, // Nepali
|
||||
ne_in = 0x0861, // Nepali, India
|
||||
ne_np = 0x0461, // Nepali, Nepal
|
||||
no = 0x0014, // Norwegian (Bokmal)
|
||||
nb = 0x7C14, // Norwegian (Bokmal)
|
||||
nb_no = 0x0414, // Norwegian (Bokmal), Norway
|
||||
nn = 0x7814, // Norwegian (Nynorsk)
|
||||
nn_no = 0x0814, // Norwegian (Nynorsk), Norway
|
||||
oc = 0x0082, // Occitan
|
||||
oc_fr = 0x0482, // Occitan, France
|
||||
@"or" = 0x0048, // Odia
|
||||
or_in = 0x0448, // Odia, India
|
||||
om = 0x0072, // Oromo
|
||||
om_et = 0x0472, // Oromo, Ethiopia
|
||||
ps = 0x0063, // Pashto
|
||||
ps_af = 0x0463, // Pashto, Afghanistan
|
||||
fa = 0x0029, // Persian
|
||||
fa_ir = 0x0429, // Persian, Iran
|
||||
pl = 0x0015, // Polish
|
||||
pl_pl = 0x0415, // Polish, Poland
|
||||
pt = 0x0016, // Portuguese
|
||||
pt_br = 0x0416, // Portuguese, Brazil
|
||||
pt_pt = 0x0816, // Portuguese, Portugal
|
||||
qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing
|
||||
qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing
|
||||
qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales
|
||||
pa = 0x0046, // Punjabi
|
||||
pa_arab = 0x7C46, // Punjabi
|
||||
pa_in = 0x0446, // Punjabi, India
|
||||
pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan
|
||||
quz = 0x006B, // Quechua
|
||||
quz_bo = 0x046B, // Quechua, Bolivia
|
||||
quz_ec = 0x086B, // Quechua, Ecuador
|
||||
quz_pe = 0x0C6B, // Quechua, Peru
|
||||
ro = 0x0018, // Romanian
|
||||
ro_md = 0x0818, // Romanian, Moldova
|
||||
ro_ro = 0x0418, // Romanian, Romania
|
||||
rm = 0x0017, // Romansh
|
||||
rm_ch = 0x0417, // Romansh, Switzerland
|
||||
ru = 0x0019, // Russian
|
||||
ru_md = 0x0819, // Russian, Moldova
|
||||
ru_ru = 0x0419, // Russian, Russia
|
||||
sah = 0x0085, // Sakha
|
||||
sah_ru = 0x0485, // Sakha, Russia
|
||||
smn = 0x703B, // Sami (Inari)
|
||||
smn_fi = 0x243B, // Sami (Inari), Finland
|
||||
smj = 0x7C3B, // Sami (Lule)
|
||||
smj_no = 0x103B, // Sami (Lule), Norway
|
||||
smj_se = 0x143B, // Sami (Lule), Sweden
|
||||
se = 0x003B, // Sami (Northern)
|
||||
se_fi = 0x0C3B, // Sami (Northern), Finland
|
||||
se_no = 0x043B, // Sami (Northern), Norway
|
||||
se_se = 0x083B, // Sami (Northern), Sweden
|
||||
sms = 0x743B, // Sami (Skolt)
|
||||
sms_fi = 0x203B, // Sami (Skolt), Finland
|
||||
sma = 0x783B, // Sami (Southern)
|
||||
sma_no = 0x183B, // Sami (Southern), Norway
|
||||
sma_se = 0x1C3B, // Sami (Southern), Sweden
|
||||
sa = 0x004F, // Sanskrit
|
||||
sa_in = 0x044F, // Sanskrit, India
|
||||
gd = 0x0091, // Scottish Gaelic
|
||||
gd_gb = 0x0491, // Scottish Gaelic, United Kingdom
|
||||
sr_cyrl = 0x6C1A, // Serbian (Cyrillic)
|
||||
sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina
|
||||
sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro
|
||||
sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia
|
||||
sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former)
|
||||
sr_latn = 0x701A, // Serbian (Latin)
|
||||
sr = 0x7C1A, // Serbian (Latin)
|
||||
sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina
|
||||
sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro
|
||||
sr_latn_rs = 0x241A, // Serbian (Latin), Serbia
|
||||
sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former)
|
||||
nso = 0x006C, // Sesotho sa Leboa
|
||||
nso_za = 0x046C, // Sesotho sa Leboa, South Africa
|
||||
tn = 0x0032, // Setswana
|
||||
tn_bw = 0x0832, // Setswana, Botswana
|
||||
tn_za = 0x0432, // Setswana, South Africa
|
||||
sd = 0x0059, // Sindhi
|
||||
sd_arab = 0x7C59, // Sindhi
|
||||
sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan
|
||||
si = 0x005B, // Sinhala
|
||||
si_lk = 0x045B, // Sinhala, Sri Lanka
|
||||
sk = 0x001B, // Slovak
|
||||
sk_sk = 0x041B, // Slovak, Slovakia
|
||||
sl = 0x0024, // Slovenian
|
||||
sl_si = 0x0424, // Slovenian, Slovenia
|
||||
so = 0x0077, // Somali
|
||||
so_so = 0x0477, // Somali, Somalia
|
||||
st = 0x0030, // Sotho
|
||||
st_za = 0x0430, // Sotho, South Africa
|
||||
es = 0x000A, // Spanish
|
||||
es_ar = 0x2C0A, // Spanish, Argentina
|
||||
es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela
|
||||
es_bo = 0x400A, // Spanish, Bolivia
|
||||
es_cl = 0x340A, // Spanish, Chile
|
||||
es_co = 0x240A, // Spanish, Colombia
|
||||
es_cr = 0x140A, // Spanish, Costa Rica
|
||||
es_cu = 0x5c0A, // Spanish, Cuba
|
||||
es_do = 0x1c0A, // Spanish, Dominican Republic
|
||||
es_ec = 0x300A, // Spanish, Ecuador
|
||||
es_sv = 0x440A, // Spanish, El Salvador
|
||||
es_gt = 0x100A, // Spanish, Guatemala
|
||||
es_hn = 0x480A, // Spanish, Honduras
|
||||
es_419 = 0x580A, // Spanish, Latin America
|
||||
es_mx = 0x080A, // Spanish, Mexico
|
||||
es_ni = 0x4C0A, // Spanish, Nicaragua
|
||||
es_pa = 0x180A, // Spanish, Panama
|
||||
es_py = 0x3C0A, // Spanish, Paraguay
|
||||
es_pe = 0x280A, // Spanish, Peru
|
||||
es_pr = 0x500A, // Spanish, Puerto Rico
|
||||
es_es_tradnl = 0x040A, // Spanish, Spain
|
||||
es_es = 0x0c0A, // Spanish, Spain
|
||||
es_us = 0x540A, // Spanish, United States
|
||||
es_uy = 0x380A, // Spanish, Uruguay
|
||||
sv = 0x001D, // Swedish
|
||||
sv_fi = 0x081D, // Swedish, Finland
|
||||
sv_se = 0x041D, // Swedish, Sweden
|
||||
syr = 0x005A, // Syriac
|
||||
syr_sy = 0x045A, // Syriac, Syria
|
||||
tg = 0x0028, // Tajik (Cyrillic)
|
||||
tg_cyrl = 0x7C28, // Tajik (Cyrillic)
|
||||
tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan
|
||||
tzm = 0x005F, // Tamazight (Latin)
|
||||
tzm_latn = 0x7C5F, // Tamazight (Latin)
|
||||
tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria
|
||||
ta = 0x0049, // Tamil
|
||||
ta_in = 0x0449, // Tamil, India
|
||||
ta_lk = 0x0849, // Tamil, Sri Lanka
|
||||
tt = 0x0044, // Tatar
|
||||
tt_ru = 0x0444, // Tatar, Russia
|
||||
te = 0x004A, // Telugu
|
||||
te_in = 0x044A, // Telugu, India
|
||||
th = 0x001E, // Thai
|
||||
th_th = 0x041E, // Thai, Thailand
|
||||
bo = 0x0051, // Tibetan
|
||||
bo_cn = 0x0451, // Tibetan, People's Republic of China
|
||||
ti = 0x0073, // Tigrinya
|
||||
ti_er = 0x0873, // Tigrinya, Eritrea
|
||||
ti_et = 0x0473, // Tigrinya, Ethiopia
|
||||
ts = 0x0031, // Tsonga
|
||||
ts_za = 0x0431, // Tsonga, South Africa
|
||||
tr = 0x001F, // Turkish
|
||||
tr_tr = 0x041F, // Turkish, Turkey
|
||||
tk = 0x0042, // Turkmen
|
||||
tk_tm = 0x0442, // Turkmen, Turkmenistan
|
||||
uk = 0x0022, // Ukrainian
|
||||
uk_ua = 0x0422, // Ukrainian, Ukraine
|
||||
hsb = 0x002E, // Upper Sorbian
|
||||
hsb_de = 0x042E, // Upper Sorbian, Germany
|
||||
ur = 0x0020, // Urdu
|
||||
ur_in = 0x0820, // Urdu, India
|
||||
ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan
|
||||
ug = 0x0080, // Uyghur
|
||||
ug_cn = 0x0480, // Uyghur, People's Republic of China
|
||||
uz_cyrl = 0x7843, // Uzbek (Cyrillic)
|
||||
uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan
|
||||
uz = 0x0043, // Uzbek (Latin)
|
||||
uz_latn = 0x7C43, // Uzbek (Latin)
|
||||
uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan
|
||||
ca_es_valencia = 0x0803, // Valencian, Spain
|
||||
ve = 0x0033, // Venda
|
||||
ve_za = 0x0433, // Venda, South Africa
|
||||
vi = 0x002A, // Vietnamese
|
||||
vi_vn = 0x042A, // Vietnamese, Vietnam
|
||||
cy = 0x0052, // Welsh
|
||||
cy_gb = 0x0452, // Welsh, United Kingdom
|
||||
wo = 0x0088, // Wolof
|
||||
wo_sn = 0x0488, // Wolof, Senegal
|
||||
xh = 0x0034, // Xhosa
|
||||
xh_za = 0x0434, // Xhosa, South Africa
|
||||
ii = 0x0078, // Yi
|
||||
ii_cn = 0x0478, // Yi, People's Republic of China
|
||||
yi_001 = 0x043D, // Yiddish, World
|
||||
yo = 0x006A, // Yoruba
|
||||
yo_ng = 0x046A, // Yoruba, Nigeria
|
||||
zu = 0x0035, // Zulu
|
||||
zu_za = 0x0435, // Zulu, South Africa
|
||||
|
||||
/// Special case
|
||||
x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting"
|
||||
};
|
||||
1104
src/resinator/lex.zig
Normal file
1104
src/resinator/lex.zig
Normal file
File diff suppressed because it is too large
Load Diff
904
src/resinator/literals.zig
Normal file
904
src/resinator/literals.zig
Normal file
@ -0,0 +1,904 @@
|
||||
const std = @import("std");
|
||||
const code_pages = @import("code_pages.zig");
|
||||
const CodePage = code_pages.CodePage;
|
||||
const windows1252 = @import("windows1252.zig");
|
||||
const ErrorDetails = @import("errors.zig").ErrorDetails;
|
||||
const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
|
||||
const Token = @import("lex.zig").Token;
|
||||
|
||||
/// rc is maximally liberal in terms of what it accepts as a number literal
|
||||
/// for data values. As long as it starts with a number or - or ~, that's good enough.
|
||||
pub fn isValidNumberDataLiteral(str: []const u8) bool {
|
||||
if (str.len == 0) return false;
|
||||
switch (str[0]) {
|
||||
'~', '-', '0'...'9' => return true,
|
||||
else => return false,
|
||||
}
|
||||
}
|
||||
|
||||
pub const SourceBytes = struct {
|
||||
slice: []const u8,
|
||||
code_page: CodePage,
|
||||
};
|
||||
|
||||
pub const StringType = enum { ascii, wide };
|
||||
|
||||
/// Valid escapes:
|
||||
/// "" -> "
|
||||
/// \a, \A => 0x08 (not 0x07 like in C)
|
||||
/// \n => 0x0A
|
||||
/// \r => 0x0D
|
||||
/// \t, \T => 0x09
|
||||
/// \\ => \
|
||||
/// \nnn => byte with numeric value given by nnn interpreted as octal
|
||||
/// (wraps on overflow, number of digits can be 1-3 for ASCII strings
|
||||
/// and 1-7 for wide strings)
|
||||
/// \xhh => byte with numeric value given by hh interpreted as hex
|
||||
/// (number of digits can be 0-2 for ASCII strings and 0-4 for
|
||||
/// wide strings)
|
||||
/// \<\r+> => \
|
||||
/// \<[\r\n\t ]+> => <nothing>
|
||||
///
|
||||
/// Special cases:
|
||||
/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself
|
||||
/// <\r> => <nothing>
|
||||
/// <\n+><\w+?\n?> => <space><\n>
|
||||
///
|
||||
/// Special, especially weird case:
|
||||
/// \"" => "
|
||||
/// NOTE: This leads to footguns because the preprocessor can start parsing things
|
||||
/// out-of-sync with the RC compiler, expanding macros within string literals, etc.
|
||||
/// This parse function handles this case the same as the Windows RC compiler, but
|
||||
/// \" within a string literal is treated as an error by the lexer, so the relevant
|
||||
/// branches should never actually be hit during this function.
|
||||
pub const IterativeStringParser = struct {
|
||||
source: []const u8,
|
||||
code_page: CodePage,
|
||||
/// The type of the string inferred by the prefix (L"" or "")
|
||||
/// This is what matters for things like the maximum digits in an
|
||||
/// escape sequence, whether or not invalid escape sequences are skipped, etc.
|
||||
declared_string_type: StringType,
|
||||
pending_codepoint: ?u21 = null,
|
||||
num_pending_spaces: u8 = 0,
|
||||
index: usize = 0,
|
||||
column: usize = 0,
|
||||
diagnostics: ?DiagnosticsContext = null,
|
||||
seen_tab: bool = false,
|
||||
|
||||
const State = enum {
|
||||
normal,
|
||||
quote,
|
||||
newline,
|
||||
escaped,
|
||||
escaped_cr,
|
||||
escaped_newlines,
|
||||
escaped_octal,
|
||||
escaped_hex,
|
||||
};
|
||||
|
||||
pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
|
||||
const declared_string_type: StringType = switch (bytes.slice[0]) {
|
||||
'L', 'l' => .wide,
|
||||
else => .ascii,
|
||||
};
|
||||
var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
|
||||
var column = options.start_column + 1; // for the removed "
|
||||
if (declared_string_type == .wide) {
|
||||
source = source[1..]; // remove L
|
||||
column += 1; // for the removed L
|
||||
}
|
||||
return .{
|
||||
.source = source,
|
||||
.code_page = bytes.code_page,
|
||||
.declared_string_type = declared_string_type,
|
||||
.column = column,
|
||||
.diagnostics = options.diagnostics,
|
||||
};
|
||||
}
|
||||
|
||||
pub const ParsedCodepoint = struct {
|
||||
codepoint: u21,
|
||||
from_escaped_integer: bool = false,
|
||||
};
|
||||
|
||||
pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
|
||||
const result = try self.nextUnchecked();
|
||||
if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
|
||||
switch (result.?.codepoint) {
|
||||
0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => {
|
||||
const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
|
||||
.rc_would_miscompile_codepoint_skip
|
||||
else
|
||||
.rc_would_miscompile_codepoint_byte_swap;
|
||||
try self.diagnostics.?.diagnostics.append(ErrorDetails{
|
||||
.err = err,
|
||||
.type = .warning,
|
||||
.token = self.diagnostics.?.token,
|
||||
.extra = .{ .number = result.?.codepoint },
|
||||
});
|
||||
try self.diagnostics.?.diagnostics.append(ErrorDetails{
|
||||
.err = err,
|
||||
.type = .note,
|
||||
.token = self.diagnostics.?.token,
|
||||
.print_source_line = false,
|
||||
.extra = .{ .number = result.?.codepoint },
|
||||
});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
|
||||
if (self.num_pending_spaces > 0) {
|
||||
// Ensure that we don't get into this predicament so we can ensure that
|
||||
// the order of processing any pending stuff doesn't matter
|
||||
std.debug.assert(self.pending_codepoint == null);
|
||||
self.num_pending_spaces -= 1;
|
||||
return .{ .codepoint = ' ' };
|
||||
}
|
||||
if (self.pending_codepoint) |pending_codepoint| {
|
||||
self.pending_codepoint = null;
|
||||
return .{ .codepoint = pending_codepoint };
|
||||
}
|
||||
if (self.index >= self.source.len) return null;
|
||||
|
||||
var state: State = .normal;
|
||||
var string_escape_n: u16 = 0;
|
||||
var string_escape_i: u8 = 0;
|
||||
const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
|
||||
.ascii => 3,
|
||||
.wide => 7,
|
||||
};
|
||||
const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
|
||||
.ascii => 2,
|
||||
.wide => 4,
|
||||
};
|
||||
|
||||
while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) {
|
||||
const c = codepoint.value;
|
||||
var backtrack = false;
|
||||
defer {
|
||||
if (backtrack) {
|
||||
self.index -= codepoint.byte_len;
|
||||
} else {
|
||||
if (c == '\t') {
|
||||
self.column += columnsUntilTabStop(self.column, 8);
|
||||
} else {
|
||||
self.column += codepoint.byte_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (state) {
|
||||
.normal => switch (c) {
|
||||
'\\' => state = .escaped,
|
||||
'"' => state = .quote,
|
||||
'\r' => {},
|
||||
'\n' => state = .newline,
|
||||
'\t' => {
|
||||
// Only warn about a tab getting converted to spaces once per string
|
||||
if (self.diagnostics != null and !self.seen_tab) {
|
||||
try self.diagnostics.?.diagnostics.append(ErrorDetails{
|
||||
.err = .tab_converted_to_spaces,
|
||||
.type = .warning,
|
||||
.token = self.diagnostics.?.token,
|
||||
});
|
||||
try self.diagnostics.?.diagnostics.append(ErrorDetails{
|
||||
.err = .tab_converted_to_spaces,
|
||||
.type = .note,
|
||||
.token = self.diagnostics.?.token,
|
||||
.print_source_line = false,
|
||||
});
|
||||
self.seen_tab = true;
|
||||
}
|
||||
const cols = columnsUntilTabStop(self.column, 8);
|
||||
self.num_pending_spaces = @intCast(cols - 1);
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = ' ' };
|
||||
},
|
||||
else => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = c };
|
||||
},
|
||||
},
|
||||
.quote => switch (c) {
|
||||
'"' => {
|
||||
// "" => "
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '"' };
|
||||
},
|
||||
else => unreachable, // this is a bug in the lexer
|
||||
},
|
||||
.newline => switch (c) {
|
||||
'\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
|
||||
else => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
// <space><newline>
|
||||
self.index += codepoint.byte_len;
|
||||
self.pending_codepoint = '\n';
|
||||
return .{ .codepoint = ' ' };
|
||||
},
|
||||
},
|
||||
.escaped => switch (c) {
|
||||
'\r' => state = .escaped_cr,
|
||||
'\n' => state = .escaped_newlines,
|
||||
'0'...'7' => {
|
||||
string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
|
||||
string_escape_i = 1;
|
||||
state = .escaped_octal;
|
||||
},
|
||||
'x', 'X' => {
|
||||
string_escape_n = 0;
|
||||
string_escape_i = 0;
|
||||
state = .escaped_hex;
|
||||
},
|
||||
else => {
|
||||
switch (c) {
|
||||
'a', 'A' => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\x08' };
|
||||
}, // might be a bug in RC, but matches its behavior
|
||||
'n' => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\n' };
|
||||
},
|
||||
'r' => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\r' };
|
||||
},
|
||||
't', 'T' => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\t' };
|
||||
},
|
||||
'\\' => {
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\\' };
|
||||
},
|
||||
'"' => {
|
||||
// \" is a special case that doesn't get the \ included,
|
||||
backtrack = true;
|
||||
},
|
||||
else => switch (self.declared_string_type) {
|
||||
.wide => {}, // invalid escape sequences are skipped in wide strings
|
||||
.ascii => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\\' };
|
||||
},
|
||||
},
|
||||
}
|
||||
state = .normal;
|
||||
},
|
||||
},
|
||||
.escaped_cr => switch (c) {
|
||||
'\r' => {},
|
||||
'\n' => state = .escaped_newlines,
|
||||
else => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = '\\' };
|
||||
},
|
||||
},
|
||||
.escaped_newlines => switch (c) {
|
||||
'\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
|
||||
else => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
state = .normal;
|
||||
},
|
||||
},
|
||||
.escaped_octal => switch (c) {
|
||||
'0'...'7' => {
|
||||
string_escape_n *%= 8;
|
||||
string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
|
||||
string_escape_i += 1;
|
||||
if (string_escape_i == max_octal_escape_digits) {
|
||||
const escaped_value = switch (self.declared_string_type) {
|
||||
.ascii => @as(u8, @truncate(string_escape_n)),
|
||||
.wide => string_escape_n,
|
||||
};
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
// write out whatever byte we have parsed so far
|
||||
const escaped_value = switch (self.declared_string_type) {
|
||||
.ascii => @as(u8, @truncate(string_escape_n)),
|
||||
.wide => string_escape_n,
|
||||
};
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
|
||||
},
|
||||
},
|
||||
.escaped_hex => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
string_escape_n *= 16;
|
||||
string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
|
||||
string_escape_i += 1;
|
||||
if (string_escape_i == max_hex_escape_digits) {
|
||||
const escaped_value = switch (self.declared_string_type) {
|
||||
.ascii => @as(u8, @truncate(string_escape_n)),
|
||||
.wide => string_escape_n,
|
||||
};
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// backtrack so that we handle the current char properly
|
||||
backtrack = true;
|
||||
// write out whatever byte we have parsed so far
|
||||
// (even with 0 actual digits, \x alone parses to 0)
|
||||
const escaped_value = switch (self.declared_string_type) {
|
||||
.ascii => @as(u8, @truncate(string_escape_n)),
|
||||
.wide => string_escape_n,
|
||||
};
|
||||
self.index += codepoint.byte_len;
|
||||
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
.normal, .escaped_newlines => {},
|
||||
.newline => {
|
||||
// <space><newline>
|
||||
self.pending_codepoint = '\n';
|
||||
return .{ .codepoint = ' ' };
|
||||
},
|
||||
.escaped, .escaped_cr => return .{ .codepoint = '\\' },
|
||||
.escaped_octal, .escaped_hex => {
|
||||
const escaped_value = switch (self.declared_string_type) {
|
||||
.ascii => @as(u8, @truncate(string_escape_n)),
|
||||
.wide => string_escape_n,
|
||||
};
|
||||
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
|
||||
},
|
||||
.quote => unreachable, // this is a bug in the lexer
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
pub const StringParseOptions = struct {
|
||||
start_column: usize = 0,
|
||||
diagnostics: ?DiagnosticsContext = null,
|
||||
output_code_page: CodePage = .windows1252,
|
||||
};
|
||||
|
||||
pub fn parseQuotedString(
|
||||
comptime literal_type: StringType,
|
||||
allocator: std.mem.Allocator,
|
||||
bytes: SourceBytes,
|
||||
options: StringParseOptions,
|
||||
) !(switch (literal_type) {
|
||||
.ascii => []u8,
|
||||
.wide => [:0]u16,
|
||||
}) {
|
||||
const T = if (literal_type == .ascii) u8 else u16;
|
||||
std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
|
||||
|
||||
var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len);
|
||||
errdefer buf.deinit();
|
||||
|
||||
var iterative_parser = IterativeStringParser.init(bytes, options);
|
||||
|
||||
while (try iterative_parser.next()) |parsed| {
|
||||
const c = parsed.codepoint;
|
||||
if (parsed.from_escaped_integer) {
|
||||
try buf.append(@intCast(c));
|
||||
} else {
|
||||
switch (literal_type) {
|
||||
.ascii => switch (options.output_code_page) {
|
||||
.windows1252 => {
|
||||
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
|
||||
try buf.append(best_fit);
|
||||
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
|
||||
try buf.append('?');
|
||||
} else {
|
||||
try buf.appendSlice("??");
|
||||
}
|
||||
},
|
||||
.utf8 => {
|
||||
var codepoint_to_encode = c;
|
||||
if (c == code_pages.Codepoint.invalid) {
|
||||
codepoint_to_encode = '<27>';
|
||||
}
|
||||
var utf8_buf: [4]u8 = undefined;
|
||||
const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
|
||||
try buf.appendSlice(utf8_buf[0..utf8_len]);
|
||||
},
|
||||
else => unreachable, // Unsupported code page
|
||||
},
|
||||
.wide => {
|
||||
if (c == code_pages.Codepoint.invalid) {
|
||||
try buf.append(std.mem.nativeToLittle(u16, '<27>'));
|
||||
} else if (c < 0x10000) {
|
||||
const short: u16 = @intCast(c);
|
||||
try buf.append(std.mem.nativeToLittle(u16, short));
|
||||
} else {
|
||||
const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
|
||||
try buf.append(std.mem.nativeToLittle(u16, high));
|
||||
const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
|
||||
try buf.append(std.mem.nativeToLittle(u16, low));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (literal_type == .wide) {
|
||||
return buf.toOwnedSliceSentinel(0);
|
||||
} else {
|
||||
return buf.toOwnedSlice();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
|
||||
std.debug.assert(bytes.slice.len >= 2); // ""
|
||||
return parseQuotedString(.ascii, allocator, bytes, options);
|
||||
}
|
||||
|
||||
pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
|
||||
std.debug.assert(bytes.slice.len >= 3); // L""
|
||||
return parseQuotedString(.wide, allocator, bytes, options);
|
||||
}
|
||||
|
||||
pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
|
||||
std.debug.assert(bytes.slice.len >= 2); // ""
|
||||
return parseQuotedString(.wide, allocator, bytes, options);
|
||||
}
|
||||
|
||||
pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
|
||||
std.debug.assert(bytes.slice.len >= 2); // ""
|
||||
return parseQuotedString(.ascii, allocator, bytes, options);
|
||||
}
|
||||
|
||||
test "parse quoted ascii string" {
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"hello"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// hex with 0 digits
|
||||
try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\x"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// hex max of 2 digits
|
||||
try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\XfFf"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// octal with invalid octal digit
|
||||
try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\19"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// escaped quotes
|
||||
try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\" "" "
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// backslash right before escaped quotes
|
||||
try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\"""
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// octal overflow
|
||||
try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\401"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// escapes
|
||||
try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\a\n\r\t\\"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// uppercase escapes
|
||||
try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\A\N\R\T\\"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// backslash on its own
|
||||
try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// unrecognized escapes
|
||||
try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
|
||||
.slice =
|
||||
\\"\b"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// escaped carriage returns
|
||||
try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// escaped newlines
|
||||
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// escaped CRLF pairs
|
||||
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// escaped newlines with other whitespace
|
||||
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// literal tab characters get converted to spaces (dependent on source file columns)
|
||||
try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"abc\t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\\\t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// literal CR's get dropped
|
||||
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// contiguous newlines and whitespace get collapsed to <space><newline>
|
||||
try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
}
|
||||
|
||||
test "parse quoted ascii string with utf8 code page" {
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Codepoints that don't have a Windows-1252 representation get converted to ?
|
||||
try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"кириллица\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Codepoints that have a best fit mapping get converted accordingly,
|
||||
// these are box drawing codepoints
|
||||
try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"┌─┐\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Invalid UTF-8 gets converted to ? depending on well-formedness
|
||||
try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Codepoints that would require a UTF-16 surrogate pair get converted to ??
|
||||
try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
|
||||
// Output code page changes how invalid UTF-8 gets converted, since it
|
||||
// now encodes the result as UTF-8 so it can write replacement characters.
|
||||
try std.testing.expectEqualSlices(u8, "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
|
||||
.{ .output_code_page = .utf8 },
|
||||
));
|
||||
try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
|
||||
arena,
|
||||
.{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
|
||||
.{ .output_code_page = .utf8 },
|
||||
));
|
||||
}
|
||||
|
||||
test "parse quoted wide string" {
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 'h', 'e', 'l', 'l', 'o' }, try parseQuotedWideString(arena, .{
|
||||
.slice =
|
||||
\\L"hello"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// hex with 0 digits
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
|
||||
.slice =
|
||||
\\L"\x"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// hex max of 4 digits
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0xFFFF, 'f' }, try parseQuotedWideString(arena, .{
|
||||
.slice =
|
||||
\\L"\XfFfFf"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// octal max of 7 digits
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0x9493, '3', '3' }, try parseQuotedWideString(arena, .{
|
||||
.slice =
|
||||
\\L"\111222333"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// octal overflow
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0xFF01}, try parseQuotedWideString(arena, .{
|
||||
.slice =
|
||||
\\L"\777401"
|
||||
,
|
||||
.code_page = .windows1252,
|
||||
}, .{}));
|
||||
// literal tab characters get converted to spaces (dependent on source file columns)
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// Windows-1252 conversion
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// Invalid escape sequences are skipped
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\\H\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
}
|
||||
|
||||
test "parse quoted wide string with utf8 code page" {
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"кириллица\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Invalid UTF-8 gets converted to <EFBFBD> depending on well-formedness
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>"), try parseQuotedWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
}
|
||||
|
||||
test "parse quoted ascii string as wide string" {
|
||||
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
|
||||
defer arena_allocator.deinit();
|
||||
const arena = arena_allocator.allocator();
|
||||
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
|
||||
arena,
|
||||
.{ .slice = "\"кириллица\"", .code_page = .utf8 },
|
||||
.{},
|
||||
));
|
||||
// Whether or not invalid escapes are skipped is still determined by the L prefix
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
|
||||
arena,
|
||||
.{ .slice = "\"\\H\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\\H\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
// Maximum escape sequence value is also determined by the L prefix
|
||||
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\x1234"), try parseQuotedStringAsWideString(
|
||||
arena,
|
||||
.{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x1234}, try parseQuotedStringAsWideString(
|
||||
arena,
|
||||
.{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
|
||||
.{},
|
||||
));
|
||||
}
|
||||
|
||||
pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
|
||||
// 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
|
||||
// 5 => 3, 6 => 2, 7 => 1, 8 => 8
|
||||
return tab_columns - (column % tab_columns);
|
||||
}
|
||||
|
||||
pub const Number = struct {
|
||||
value: u32,
|
||||
is_long: bool = false,
|
||||
|
||||
pub fn asWord(self: Number) u16 {
|
||||
return @truncate(self.value);
|
||||
}
|
||||
|
||||
pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
|
||||
const result = switch (operator_char) {
|
||||
'-' => lhs.value -% rhs.value,
|
||||
'+' => lhs.value +% rhs.value,
|
||||
'|' => lhs.value | rhs.value,
|
||||
'&' => lhs.value & rhs.value,
|
||||
else => unreachable, // invalid operator, this would be a lexer/parser bug
|
||||
};
|
||||
return .{
|
||||
.value = result,
|
||||
.is_long = lhs.is_long or rhs.is_long,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Assumes that number literals normally rejected by RC's preprocessor
|
||||
/// are similarly rejected before being parsed.
|
||||
///
|
||||
/// Relevant RC preprocessor errors:
|
||||
/// RC2021: expected exponent value, not '<digit>'
|
||||
/// example that is rejected: 1e1
|
||||
/// example that is accepted: 1ea
|
||||
/// (this function will parse the two examples above the same)
|
||||
pub fn parseNumberLiteral(bytes: SourceBytes) Number {
|
||||
std.debug.assert(bytes.slice.len > 0);
|
||||
var result = Number{ .value = 0, .is_long = false };
|
||||
var radix: u8 = 10;
|
||||
var buf = bytes.slice;
|
||||
|
||||
const Prefix = enum { none, minus, complement };
|
||||
var prefix: Prefix = .none;
|
||||
switch (buf[0]) {
|
||||
'-' => {
|
||||
prefix = .minus;
|
||||
buf = buf[1..];
|
||||
},
|
||||
'~' => {
|
||||
prefix = .complement;
|
||||
buf = buf[1..];
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
if (buf.len > 2 and buf[0] == '0') {
|
||||
switch (buf[1]) {
|
||||
'o' => { // octal radix prefix is case-sensitive
|
||||
radix = 8;
|
||||
buf = buf[2..];
|
||||
},
|
||||
'x', 'X' => {
|
||||
radix = 16;
|
||||
buf = buf[2..];
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
var i: usize = 0;
|
||||
while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
|
||||
const c = codepoint.value;
|
||||
if (c == 'L' or c == 'l') {
|
||||
result.is_long = true;
|
||||
break;
|
||||
}
|
||||
const digit = switch (c) {
|
||||
// On invalid digit for the radix, just stop parsing but don't fail
|
||||
0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
|
||||
else => break,
|
||||
};
|
||||
|
||||
if (result.value != 0) {
|
||||
result.value *%= radix;
|
||||
}
|
||||
result.value +%= digit;
|
||||
}
|
||||
|
||||
switch (prefix) {
|
||||
.none => {},
|
||||
.minus => result.value = 0 -% result.value,
|
||||
.complement => result.value = ~result.value,
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "parse number literal" {
|
||||
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
|
||||
|
||||
// can handle any length of number, wraps on overflow appropriately
|
||||
const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
|
||||
try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
|
||||
try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
|
||||
|
||||
try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
|
||||
|
||||
try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
|
||||
|
||||
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
|
||||
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
|
||||
|
||||
// anything after L is ignored
|
||||
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
|
||||
}
|
||||
1880
src/resinator/parse.zig
Normal file
1880
src/resinator/parse.zig
Normal file
File diff suppressed because it is too large
Load Diff
407
src/resinator/rc.zig
Normal file
407
src/resinator/rc.zig
Normal file
@ -0,0 +1,407 @@
|
||||
const std = @import("std");
|
||||
const utils = @import("utils.zig");
|
||||
const res = @import("res.zig");
|
||||
const SourceBytes = @import("literals.zig").SourceBytes;
|
||||
|
||||
// https://learn.microsoft.com/en-us/windows/win32/menurc/about-resource-files
|
||||
|
||||
pub const Resource = enum {
|
||||
accelerators,
|
||||
bitmap,
|
||||
cursor,
|
||||
dialog,
|
||||
dialogex,
|
||||
/// As far as I can tell, this is undocumented; the most I could find was this:
|
||||
/// https://www.betaarchive.com/wiki/index.php/Microsoft_KB_Archive/91697
|
||||
dlginclude,
|
||||
/// Undocumented, basically works exactly like RCDATA
|
||||
dlginit,
|
||||
font,
|
||||
html,
|
||||
icon,
|
||||
menu,
|
||||
menuex,
|
||||
messagetable,
|
||||
plugplay, // Obsolete
|
||||
rcdata,
|
||||
stringtable,
|
||||
/// Undocumented
|
||||
toolbar,
|
||||
user_defined,
|
||||
versioninfo,
|
||||
vxd, // Obsolete
|
||||
|
||||
// Types that are treated as a user-defined type when encountered, but have
|
||||
// special meaning without the Visual Studio GUI. We match the Win32 RC compiler
|
||||
// behavior by acting as if these keyword don't exist when compiling the .rc
|
||||
// (thereby treating them as user-defined).
|
||||
//textinclude, // A special resource that is interpreted by Visual C++.
|
||||
//typelib, // A special resource that is used with the /TLBID and /TLBOUT linker options
|
||||
|
||||
// Types that can only be specified by numbers, they don't have keywords
|
||||
cursor_num,
|
||||
icon_num,
|
||||
string_num,
|
||||
anicursor_num,
|
||||
aniicon_num,
|
||||
fontdir_num,
|
||||
manifest_num,
|
||||
|
||||
const map = std.ComptimeStringMapWithEql(Resource, .{
|
||||
.{ "ACCELERATORS", .accelerators },
|
||||
.{ "BITMAP", .bitmap },
|
||||
.{ "CURSOR", .cursor },
|
||||
.{ "DIALOG", .dialog },
|
||||
.{ "DIALOGEX", .dialogex },
|
||||
.{ "DLGINCLUDE", .dlginclude },
|
||||
.{ "DLGINIT", .dlginit },
|
||||
.{ "FONT", .font },
|
||||
.{ "HTML", .html },
|
||||
.{ "ICON", .icon },
|
||||
.{ "MENU", .menu },
|
||||
.{ "MENUEX", .menuex },
|
||||
.{ "MESSAGETABLE", .messagetable },
|
||||
.{ "PLUGPLAY", .plugplay },
|
||||
.{ "RCDATA", .rcdata },
|
||||
.{ "STRINGTABLE", .stringtable },
|
||||
.{ "TOOLBAR", .toolbar },
|
||||
.{ "VERSIONINFO", .versioninfo },
|
||||
.{ "VXD", .vxd },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
|
||||
pub fn fromString(bytes: SourceBytes) Resource {
|
||||
const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(bytes);
|
||||
if (maybe_ordinal) |ordinal| {
|
||||
if (ordinal.ordinal >= 256) return .user_defined;
|
||||
return fromRT(@enumFromInt(ordinal.ordinal));
|
||||
}
|
||||
return map.get(bytes.slice) orelse .user_defined;
|
||||
}
|
||||
|
||||
// TODO: Some comptime validation that RT <-> Resource conversion is synced?
|
||||
pub fn fromRT(rt: res.RT) Resource {
|
||||
return switch (rt) {
|
||||
.ACCELERATOR => .accelerators,
|
||||
.ANICURSOR => .anicursor_num,
|
||||
.ANIICON => .aniicon_num,
|
||||
.BITMAP => .bitmap,
|
||||
.CURSOR => .cursor_num,
|
||||
.DIALOG => .dialog,
|
||||
.DLGINCLUDE => .dlginclude,
|
||||
.DLGINIT => .dlginit,
|
||||
.FONT => .font,
|
||||
.FONTDIR => .fontdir_num,
|
||||
.GROUP_CURSOR => .cursor,
|
||||
.GROUP_ICON => .icon,
|
||||
.HTML => .html,
|
||||
.ICON => .icon_num,
|
||||
.MANIFEST => .manifest_num,
|
||||
.MENU => .menu,
|
||||
.MESSAGETABLE => .messagetable,
|
||||
.PLUGPLAY => .plugplay,
|
||||
.RCDATA => .rcdata,
|
||||
.STRING => .string_num,
|
||||
.TOOLBAR => .toolbar,
|
||||
.VERSION => .versioninfo,
|
||||
.VXD => .vxd,
|
||||
_ => .user_defined,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn canUseRawData(resource: Resource) bool {
|
||||
return switch (resource) {
|
||||
.user_defined,
|
||||
.html,
|
||||
.plugplay, // Obsolete
|
||||
.rcdata,
|
||||
.vxd, // Obsolete
|
||||
.manifest_num,
|
||||
.dlginit,
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn nameForErrorDisplay(resource: Resource) []const u8 {
|
||||
return switch (resource) {
|
||||
// zig fmt: off
|
||||
.accelerators, .bitmap, .cursor, .dialog, .dialogex, .dlginclude, .dlginit, .font,
|
||||
.html, .icon, .menu, .menuex, .messagetable, .plugplay, .rcdata, .stringtable,
|
||||
.toolbar, .versioninfo, .vxd => @tagName(resource),
|
||||
// zig fmt: on
|
||||
.user_defined => "user-defined",
|
||||
.cursor_num => std.fmt.comptimePrint("{d} (cursor)", .{@intFromEnum(res.RT.CURSOR)}),
|
||||
.icon_num => std.fmt.comptimePrint("{d} (icon)", .{@intFromEnum(res.RT.ICON)}),
|
||||
.string_num => std.fmt.comptimePrint("{d} (string)", .{@intFromEnum(res.RT.STRING)}),
|
||||
.anicursor_num => std.fmt.comptimePrint("{d} (anicursor)", .{@intFromEnum(res.RT.ANICURSOR)}),
|
||||
.aniicon_num => std.fmt.comptimePrint("{d} (aniicon)", .{@intFromEnum(res.RT.ANIICON)}),
|
||||
.fontdir_num => std.fmt.comptimePrint("{d} (fontdir)", .{@intFromEnum(res.RT.FONTDIR)}),
|
||||
.manifest_num => std.fmt.comptimePrint("{d} (manifest)", .{@intFromEnum(res.RT.MANIFEST)}),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable-resource#parameters
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialog-resource#parameters
|
||||
/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialogex-resource#parameters
|
||||
pub const OptionalStatements = enum {
|
||||
characteristics,
|
||||
language,
|
||||
version,
|
||||
|
||||
// DIALOG
|
||||
caption,
|
||||
class,
|
||||
exstyle,
|
||||
font,
|
||||
menu,
|
||||
style,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(OptionalStatements, .{
|
||||
.{ "CHARACTERISTICS", .characteristics },
|
||||
.{ "LANGUAGE", .language },
|
||||
.{ "VERSION", .version },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
|
||||
pub const dialog_map = std.ComptimeStringMapWithEql(OptionalStatements, .{
|
||||
.{ "CAPTION", .caption },
|
||||
.{ "CLASS", .class },
|
||||
.{ "EXSTYLE", .exstyle },
|
||||
.{ "FONT", .font },
|
||||
.{ "MENU", .menu },
|
||||
.{ "STYLE", .style },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
pub const Control = enum {
|
||||
auto3state,
|
||||
autocheckbox,
|
||||
autoradiobutton,
|
||||
checkbox,
|
||||
combobox,
|
||||
control,
|
||||
ctext,
|
||||
defpushbutton,
|
||||
edittext,
|
||||
hedit,
|
||||
iedit,
|
||||
groupbox,
|
||||
icon,
|
||||
listbox,
|
||||
ltext,
|
||||
pushbox,
|
||||
pushbutton,
|
||||
radiobutton,
|
||||
rtext,
|
||||
scrollbar,
|
||||
state3,
|
||||
userbutton,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(Control, .{
|
||||
.{ "AUTO3STATE", .auto3state },
|
||||
.{ "AUTOCHECKBOX", .autocheckbox },
|
||||
.{ "AUTORADIOBUTTON", .autoradiobutton },
|
||||
.{ "CHECKBOX", .checkbox },
|
||||
.{ "COMBOBOX", .combobox },
|
||||
.{ "CONTROL", .control },
|
||||
.{ "CTEXT", .ctext },
|
||||
.{ "DEFPUSHBUTTON", .defpushbutton },
|
||||
.{ "EDITTEXT", .edittext },
|
||||
.{ "HEDIT", .hedit },
|
||||
.{ "IEDIT", .iedit },
|
||||
.{ "GROUPBOX", .groupbox },
|
||||
.{ "ICON", .icon },
|
||||
.{ "LISTBOX", .listbox },
|
||||
.{ "LTEXT", .ltext },
|
||||
.{ "PUSHBOX", .pushbox },
|
||||
.{ "PUSHBUTTON", .pushbutton },
|
||||
.{ "RADIOBUTTON", .radiobutton },
|
||||
.{ "RTEXT", .rtext },
|
||||
.{ "SCROLLBAR", .scrollbar },
|
||||
.{ "STATE3", .state3 },
|
||||
.{ "USERBUTTON", .userbutton },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
|
||||
pub fn hasTextParam(control: Control) bool {
|
||||
switch (control) {
|
||||
.scrollbar, .listbox, .iedit, .hedit, .edittext, .combobox => return false,
|
||||
else => return true,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const ControlClass = struct {
|
||||
pub const map = std.ComptimeStringMapWithEql(res.ControlClass, .{
|
||||
.{ "BUTTON", .button },
|
||||
.{ "EDIT", .edit },
|
||||
.{ "STATIC", .static },
|
||||
.{ "LISTBOX", .listbox },
|
||||
.{ "SCROLLBAR", .scrollbar },
|
||||
.{ "COMBOBOX", .combobox },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
|
||||
/// Like `map.get` but works on WTF16 strings, for use with parsed
|
||||
/// string literals ("BUTTON", or even "\x42UTTON")
|
||||
pub fn fromWideString(str: []const u16) ?res.ControlClass {
|
||||
const utf16Literal = std.unicode.utf8ToUtf16LeStringLiteral;
|
||||
return if (ascii.eqlIgnoreCaseW(str, utf16Literal("BUTTON")))
|
||||
.button
|
||||
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("EDIT")))
|
||||
.edit
|
||||
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("STATIC")))
|
||||
.static
|
||||
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("LISTBOX")))
|
||||
.listbox
|
||||
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("SCROLLBAR")))
|
||||
.scrollbar
|
||||
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("COMBOBOX")))
|
||||
.combobox
|
||||
else
|
||||
null;
|
||||
}
|
||||
};
|
||||
|
||||
const ascii = struct {
|
||||
/// Compares ASCII values case-insensitively, non-ASCII values are compared directly
|
||||
pub fn eqlIgnoreCaseW(a: []const u16, b: []const u16) bool {
|
||||
if (a.len != b.len) return false;
|
||||
for (a, b) |a_c, b_c| {
|
||||
if (a_c < 128) {
|
||||
if (std.ascii.toLower(@intCast(a_c)) != std.ascii.toLower(@intCast(b_c))) return false;
|
||||
} else {
|
||||
if (a_c != b_c) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
pub const MenuItem = enum {
|
||||
menuitem,
|
||||
popup,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(MenuItem, .{
|
||||
.{ "MENUITEM", .menuitem },
|
||||
.{ "POPUP", .popup },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
|
||||
pub fn isSeparator(bytes: []const u8) bool {
|
||||
return std.ascii.eqlIgnoreCase(bytes, "SEPARATOR");
|
||||
}
|
||||
|
||||
pub const Option = enum {
|
||||
checked,
|
||||
grayed,
|
||||
help,
|
||||
inactive,
|
||||
menubarbreak,
|
||||
menubreak,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(Option, .{
|
||||
.{ "CHECKED", .checked },
|
||||
.{ "GRAYED", .grayed },
|
||||
.{ "HELP", .help },
|
||||
.{ "INACTIVE", .inactive },
|
||||
.{ "MENUBARBREAK", .menubarbreak },
|
||||
.{ "MENUBREAK", .menubreak },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
};
|
||||
|
||||
pub const ToolbarButton = enum {
|
||||
button,
|
||||
separator,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(ToolbarButton, .{
|
||||
.{ "BUTTON", .button },
|
||||
.{ "SEPARATOR", .separator },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
pub const VersionInfo = enum {
|
||||
file_version,
|
||||
product_version,
|
||||
file_flags_mask,
|
||||
file_flags,
|
||||
file_os,
|
||||
file_type,
|
||||
file_subtype,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(VersionInfo, .{
|
||||
.{ "FILEVERSION", .file_version },
|
||||
.{ "PRODUCTVERSION", .product_version },
|
||||
.{ "FILEFLAGSMASK", .file_flags_mask },
|
||||
.{ "FILEFLAGS", .file_flags },
|
||||
.{ "FILEOS", .file_os },
|
||||
.{ "FILETYPE", .file_type },
|
||||
.{ "FILESUBTYPE", .file_subtype },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
pub const VersionBlock = enum {
|
||||
block,
|
||||
value,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(VersionBlock, .{
|
||||
.{ "BLOCK", .block },
|
||||
.{ "VALUE", .value },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
/// Keywords that are be the first token in a statement and (if so) dictate how the rest
|
||||
/// of the statement is parsed.
|
||||
pub const TopLevelKeywords = enum {
|
||||
language,
|
||||
version,
|
||||
characteristics,
|
||||
stringtable,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(TopLevelKeywords, .{
|
||||
.{ "LANGUAGE", .language },
|
||||
.{ "VERSION", .version },
|
||||
.{ "CHARACTERISTICS", .characteristics },
|
||||
.{ "STRINGTABLE", .stringtable },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
pub const CommonResourceAttributes = enum {
|
||||
preload,
|
||||
loadoncall,
|
||||
fixed,
|
||||
moveable,
|
||||
discardable,
|
||||
pure,
|
||||
impure,
|
||||
shared,
|
||||
nonshared,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(CommonResourceAttributes, .{
|
||||
.{ "PRELOAD", .preload },
|
||||
.{ "LOADONCALL", .loadoncall },
|
||||
.{ "FIXED", .fixed },
|
||||
.{ "MOVEABLE", .moveable },
|
||||
.{ "DISCARDABLE", .discardable },
|
||||
.{ "PURE", .pure },
|
||||
.{ "IMPURE", .impure },
|
||||
.{ "SHARED", .shared },
|
||||
.{ "NONSHARED", .nonshared },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
|
||||
pub const AcceleratorTypeAndOptions = enum {
|
||||
virtkey,
|
||||
ascii,
|
||||
noinvert,
|
||||
alt,
|
||||
shift,
|
||||
control,
|
||||
|
||||
pub const map = std.ComptimeStringMapWithEql(AcceleratorTypeAndOptions, .{
|
||||
.{ "VIRTKEY", .virtkey },
|
||||
.{ "ASCII", .ascii },
|
||||
.{ "NOINVERT", .noinvert },
|
||||
.{ "ALT", .alt },
|
||||
.{ "SHIFT", .shift },
|
||||
.{ "CONTROL", .control },
|
||||
}, std.comptime_string_map.eqlAsciiIgnoreCase);
|
||||
};
|
||||
1108
src/resinator/res.zig
Normal file
1108
src/resinator/res.zig
Normal file
File diff suppressed because it is too large
Load Diff
684
src/resinator/source_mapping.zig
Normal file
684
src/resinator/source_mapping.zig
Normal file
@ -0,0 +1,684 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
|
||||
const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString;
|
||||
const lex = @import("lex.zig");
|
||||
|
||||
pub const ParseLineCommandsResult = struct {
|
||||
result: []u8,
|
||||
mappings: SourceMappings,
|
||||
};
|
||||
|
||||
const CurrentMapping = struct {
|
||||
line_num: usize = 1,
|
||||
filename: std.ArrayListUnmanaged(u8) = .{},
|
||||
pending: bool = true,
|
||||
ignore_contents: bool = false,
|
||||
};
|
||||
|
||||
pub const ParseAndRemoveLineCommandsOptions = struct {
|
||||
initial_filename: ?[]const u8 = null,
|
||||
};
|
||||
|
||||
/// Parses and removes #line commands as well as all source code that is within a file
|
||||
/// with .c or .h extensions.
|
||||
///
|
||||
/// > RC treats files with the .c and .h extensions in a special manner. It
|
||||
/// > assumes that a file with one of these extensions does not contain
|
||||
/// > resources. If a file has the .c or .h file name extension, RC ignores all
|
||||
/// > lines in the file except the preprocessor directives. Therefore, to
|
||||
/// > include a file that contains resources in another resource script, give
|
||||
/// > the file to be included an extension other than .c or .h.
|
||||
/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
|
||||
///
|
||||
/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
|
||||
/// between the lines and their corresponding lines in their original files.
|
||||
///
|
||||
/// `buf` must be at least as long as `source`
|
||||
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
|
||||
///
|
||||
/// If `options.initial_filename` is provided, that filename is guaranteed to be
|
||||
/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
|
||||
pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
|
||||
var parse_result = ParseLineCommandsResult{
|
||||
.result = undefined,
|
||||
.mappings = .{},
|
||||
};
|
||||
errdefer parse_result.mappings.deinit(allocator);
|
||||
|
||||
var current_mapping: CurrentMapping = .{};
|
||||
defer current_mapping.filename.deinit(allocator);
|
||||
|
||||
if (options.initial_filename) |initial_filename| {
|
||||
try current_mapping.filename.appendSlice(allocator, initial_filename);
|
||||
parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
|
||||
}
|
||||
|
||||
std.debug.assert(buf.len >= source.len);
|
||||
var result = UncheckedSliceWriter{ .slice = buf };
|
||||
const State = enum {
|
||||
line_start,
|
||||
preprocessor,
|
||||
non_preprocessor,
|
||||
};
|
||||
var state: State = .line_start;
|
||||
var index: usize = 0;
|
||||
var pending_start: ?usize = null;
|
||||
var preprocessor_start: usize = 0;
|
||||
var line_number: usize = 1;
|
||||
while (index < source.len) : (index += 1) {
|
||||
const c = source[index];
|
||||
switch (state) {
|
||||
.line_start => switch (c) {
|
||||
'#' => {
|
||||
preprocessor_start = index;
|
||||
state = .preprocessor;
|
||||
if (pending_start == null) {
|
||||
pending_start = index;
|
||||
}
|
||||
},
|
||||
'\r', '\n' => {
|
||||
const is_crlf = formsLineEndingPair(source, c, index + 1);
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.write(c);
|
||||
if (is_crlf) result.write(source[index + 1]);
|
||||
line_number += 1;
|
||||
}
|
||||
if (is_crlf) index += 1;
|
||||
pending_start = null;
|
||||
},
|
||||
' ', '\t', '\x0b', '\x0c' => {
|
||||
if (pending_start == null) {
|
||||
pending_start = index;
|
||||
}
|
||||
},
|
||||
else => {
|
||||
state = .non_preprocessor;
|
||||
if (pending_start != null) {
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.writeSlice(source[pending_start.? .. index + 1]);
|
||||
}
|
||||
pending_start = null;
|
||||
continue;
|
||||
}
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.write(c);
|
||||
}
|
||||
},
|
||||
},
|
||||
.preprocessor => switch (c) {
|
||||
'\r', '\n' => {
|
||||
// Now that we have the full line we can decide what to do with it
|
||||
const preprocessor_str = source[preprocessor_start..index];
|
||||
const is_crlf = formsLineEndingPair(source, c, index + 1);
|
||||
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
|
||||
try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
|
||||
} else {
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
if (!current_mapping.ignore_contents) {
|
||||
const line_ending_len: usize = if (is_crlf) 2 else 1;
|
||||
result.writeSlice(source[pending_start.? .. index + line_ending_len]);
|
||||
line_number += 1;
|
||||
}
|
||||
}
|
||||
if (is_crlf) index += 1;
|
||||
state = .line_start;
|
||||
pending_start = null;
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
.non_preprocessor => switch (c) {
|
||||
'\r', '\n' => {
|
||||
const is_crlf = formsLineEndingPair(source, c, index + 1);
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.write(c);
|
||||
if (is_crlf) result.write(source[index + 1]);
|
||||
line_number += 1;
|
||||
}
|
||||
if (is_crlf) index += 1;
|
||||
state = .line_start;
|
||||
pending_start = null;
|
||||
},
|
||||
else => {
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.write(c);
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
} else {
|
||||
switch (state) {
|
||||
.line_start => {},
|
||||
.non_preprocessor => {
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
},
|
||||
.preprocessor => {
|
||||
// Now that we have the full line we can decide what to do with it
|
||||
const preprocessor_str = source[preprocessor_start..index];
|
||||
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
|
||||
try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
|
||||
} else {
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
if (!current_mapping.ignore_contents) {
|
||||
result.writeSlice(source[pending_start.?..index]);
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
parse_result.result = result.getWritten();
|
||||
|
||||
// Remove whitespace from the end of the result. This avoids issues when the
|
||||
// preprocessor adds a newline to the end of the file, since then the
|
||||
// post-preprocessed source could have more lines than the corresponding input source and
|
||||
// the inserted line can't be mapped to any lines in the original file.
|
||||
// There's no way that whitespace at the end of a file can affect the parsing
|
||||
// of the RC script so this is okay to do unconditionally.
|
||||
// TODO: There might be a better way around this
|
||||
while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
|
||||
parse_result.result.len -= 1;
|
||||
}
|
||||
|
||||
// If there have been no line mappings at all, then we're dealing with an empty file.
|
||||
// In this case, we want to fake a line mapping just so that we return something
|
||||
// that is useable in the same way that a non-empty mapping would be.
|
||||
if (parse_result.mappings.mapping.items.len == 0) {
|
||||
try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
|
||||
}
|
||||
|
||||
return parse_result;
|
||||
}
|
||||
|
||||
/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
|
||||
pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
|
||||
if (next_index >= source.len) return false;
|
||||
|
||||
const next_ending = source[next_index];
|
||||
if (next_ending != '\r' and next_ending != '\n') return false;
|
||||
|
||||
// can't be \n\n or \r\r
|
||||
if (line_ending == next_ending) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
|
||||
const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
|
||||
|
||||
try mapping.set(allocator, post_processed_line_number, .{
|
||||
.start_line = current_mapping.line_num,
|
||||
.end_line = current_mapping.line_num,
|
||||
.filename_offset = filename_offset,
|
||||
});
|
||||
|
||||
current_mapping.line_num += 1;
|
||||
current_mapping.pending = false;
|
||||
}
|
||||
|
||||
// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
|
||||
pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void {
|
||||
// TODO: Are there other whitespace characters that should be included?
|
||||
var tokenizer = std.mem.tokenize(u8, line_command, " \t");
|
||||
const line_directive = tokenizer.next() orelse return; // #line
|
||||
if (!std.mem.eql(u8, line_directive, "#line")) return;
|
||||
const linenum_str = tokenizer.next() orelse return;
|
||||
const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return;
|
||||
|
||||
var filename_literal = tokenizer.rest();
|
||||
while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
|
||||
filename_literal.len -= 1;
|
||||
}
|
||||
if (filename_literal.len < 2) return;
|
||||
const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
|
||||
if (!is_quoted) return;
|
||||
const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) {
|
||||
error.OutOfMemory => |e| return e,
|
||||
else => return,
|
||||
};
|
||||
defer allocator.free(filename);
|
||||
|
||||
current_mapping.line_num = linenum;
|
||||
current_mapping.filename.clearRetainingCapacity();
|
||||
try current_mapping.filename.appendSlice(allocator, filename);
|
||||
current_mapping.pending = true;
|
||||
current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
|
||||
}
|
||||
|
||||
pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
|
||||
var buf = try allocator.alloc(u8, source.len);
|
||||
errdefer allocator.free(buf);
|
||||
var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
|
||||
result.result = try allocator.realloc(buf, result.result.len);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// C-style string parsing with a few caveats:
|
||||
/// - The str cannot contain newlines or carriage returns
|
||||
/// - Hex and octal escape are limited to u8
|
||||
/// - No handling/support for L, u, or U prefixed strings
|
||||
/// - The start and end double quotes should be omitted from the `str`
|
||||
/// - Other than the above, does not assume any validity of the strings (i.e. there
|
||||
/// may be unescaped double quotes within the str) and will return error.InvalidString
|
||||
/// on any problems found.
|
||||
///
|
||||
/// The result is a UTF-8 encoded string.
|
||||
fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
|
||||
const State = enum {
|
||||
string,
|
||||
escape,
|
||||
escape_hex,
|
||||
escape_octal,
|
||||
escape_u,
|
||||
};
|
||||
|
||||
var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
|
||||
errdefer filename.deinit();
|
||||
var state: State = .string;
|
||||
var index: usize = 0;
|
||||
var escape_len: usize = undefined;
|
||||
var escape_val: u64 = undefined;
|
||||
var escape_expected_len: u8 = undefined;
|
||||
while (index < str.len) : (index += 1) {
|
||||
const c = str[index];
|
||||
switch (state) {
|
||||
.string => switch (c) {
|
||||
'\\' => state = .escape,
|
||||
'"' => return error.InvalidString,
|
||||
else => filename.appendAssumeCapacity(c),
|
||||
},
|
||||
.escape => switch (c) {
|
||||
'\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
|
||||
const escaped_c = switch (c) {
|
||||
'\'', '"', '\\', '?' => c,
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'e' => '\x1b', // non-standard
|
||||
'f' => '\x0c',
|
||||
'v' => '\x0b',
|
||||
else => unreachable,
|
||||
};
|
||||
filename.appendAssumeCapacity(escaped_c);
|
||||
state = .string;
|
||||
},
|
||||
'x' => {
|
||||
escape_val = 0;
|
||||
escape_len = 0;
|
||||
state = .escape_hex;
|
||||
},
|
||||
'0'...'7' => {
|
||||
escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
|
||||
escape_len = 1;
|
||||
state = .escape_octal;
|
||||
},
|
||||
'u' => {
|
||||
escape_val = 0;
|
||||
escape_len = 0;
|
||||
state = .escape_u;
|
||||
escape_expected_len = 4;
|
||||
},
|
||||
'U' => {
|
||||
escape_val = 0;
|
||||
escape_len = 0;
|
||||
state = .escape_u;
|
||||
escape_expected_len = 8;
|
||||
},
|
||||
else => return error.InvalidString,
|
||||
},
|
||||
.escape_hex => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
|
||||
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
|
||||
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
|
||||
escape_len += 1;
|
||||
},
|
||||
else => {
|
||||
if (escape_len == 0) return error.InvalidString;
|
||||
filename.appendAssumeCapacity(@intCast(escape_val));
|
||||
state = .string;
|
||||
index -= 1; // reconsume
|
||||
},
|
||||
},
|
||||
.escape_octal => switch (c) {
|
||||
'0'...'7' => {
|
||||
const digit = std.fmt.charToDigit(c, 8) catch unreachable;
|
||||
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
|
||||
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
|
||||
escape_len += 1;
|
||||
if (escape_len == 3) {
|
||||
filename.appendAssumeCapacity(@intCast(escape_val));
|
||||
state = .string;
|
||||
}
|
||||
},
|
||||
else => {
|
||||
if (escape_len == 0) return error.InvalidString;
|
||||
filename.appendAssumeCapacity(@intCast(escape_val));
|
||||
state = .string;
|
||||
index -= 1; // reconsume
|
||||
},
|
||||
},
|
||||
.escape_u => switch (c) {
|
||||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||||
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
|
||||
if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
|
||||
escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
|
||||
escape_len += 1;
|
||||
if (escape_len == escape_expected_len) {
|
||||
var buf: [4]u8 = undefined;
|
||||
const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
|
||||
filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
|
||||
state = .string;
|
||||
}
|
||||
},
|
||||
// Requires escape_expected_len valid hex digits
|
||||
else => return error.InvalidString,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
switch (state) {
|
||||
.string => {},
|
||||
.escape, .escape_u => return error.InvalidString,
|
||||
.escape_hex => {
|
||||
if (escape_len == 0) return error.InvalidString;
|
||||
filename.appendAssumeCapacity(@intCast(escape_val));
|
||||
},
|
||||
.escape_octal => {
|
||||
filename.appendAssumeCapacity(@intCast(escape_val));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
return filename.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn testParseFilename(expected: []const u8, input: []const u8) !void {
|
||||
const parsed = try parseFilename(std.testing.allocator, input);
|
||||
defer std.testing.allocator.free(parsed);
|
||||
|
||||
return std.testing.expectEqualSlices(u8, expected, parsed);
|
||||
}
|
||||
|
||||
test parseFilename {
|
||||
try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
|
||||
try testParseFilename("\xABz\x53", "\\xABz\\123");
|
||||
try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
|
||||
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
|
||||
}
|
||||
|
||||
pub const SourceMappings = struct {
|
||||
/// line number -> span where the index is (line number - 1)
|
||||
mapping: std.ArrayListUnmanaged(SourceSpan) = .{},
|
||||
files: StringTable = .{},
|
||||
/// The default assumes that the first filename added is the root file.
|
||||
/// The value should be set to the correct offset if that assumption does not hold.
|
||||
root_filename_offset: u32 = 0,
|
||||
|
||||
pub const SourceSpan = struct {
|
||||
start_line: usize,
|
||||
end_line: usize,
|
||||
filename_offset: u32,
|
||||
};
|
||||
|
||||
pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
|
||||
self.files.deinit(allocator);
|
||||
self.mapping.deinit(allocator);
|
||||
}
|
||||
|
||||
pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void {
|
||||
var ptr = try self.expandAndGet(allocator, line_num);
|
||||
ptr.* = span;
|
||||
}
|
||||
|
||||
pub fn has(self: *SourceMappings, line_num: usize) bool {
|
||||
return self.mapping.items.len >= line_num;
|
||||
}
|
||||
|
||||
/// Note: `line_num` is 1-indexed
|
||||
pub fn get(self: SourceMappings, line_num: usize) SourceSpan {
|
||||
return self.mapping.items[line_num - 1];
|
||||
}
|
||||
|
||||
pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan {
|
||||
return &self.mapping.items[line_num - 1];
|
||||
}
|
||||
|
||||
/// Expands the number of lines in the mapping to include the requested
|
||||
/// line number (if necessary) and returns a pointer to the value at that
|
||||
/// line number.
|
||||
///
|
||||
/// Note: `line_num` is 1-indexed
|
||||
pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan {
|
||||
try self.mapping.resize(allocator, line_num);
|
||||
return &self.mapping.items[line_num - 1];
|
||||
}
|
||||
|
||||
pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void {
|
||||
std.debug.assert(num_following_lines_to_collapse > 0);
|
||||
|
||||
var span_to_collapse_into = self.getPtr(line_num);
|
||||
const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse);
|
||||
span_to_collapse_into.end_line = last_collapsed_span.end_line;
|
||||
|
||||
const after_collapsed_start = line_num + num_following_lines_to_collapse;
|
||||
const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse;
|
||||
std.mem.copy(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]);
|
||||
|
||||
self.mapping.items.len = new_num_lines;
|
||||
}
|
||||
|
||||
/// Returns true if the line is from the main/root file (i.e. not a file that has been
|
||||
/// `#include`d).
|
||||
pub fn isRootFile(self: *SourceMappings, line_num: usize) bool {
|
||||
const line_mapping = self.get(line_num);
|
||||
if (line_mapping.filename_offset == self.root_filename_offset) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
test "SourceMappings collapse" {
|
||||
const allocator = std.testing.allocator;
|
||||
|
||||
var mappings = SourceMappings{};
|
||||
defer mappings.deinit(allocator);
|
||||
const filename_offset = try mappings.files.put(allocator, "test.rc");
|
||||
|
||||
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset });
|
||||
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset });
|
||||
try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset });
|
||||
try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset });
|
||||
|
||||
mappings.collapse(1, 2);
|
||||
|
||||
try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len);
|
||||
try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line);
|
||||
try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line);
|
||||
}
|
||||
|
||||
/// Same thing as StringTable in Zig's src/Wasm.zig
|
||||
pub const StringTable = struct {
|
||||
data: std.ArrayListUnmanaged(u8) = .{},
|
||||
map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
|
||||
|
||||
pub fn deinit(self: *StringTable, allocator: Allocator) void {
|
||||
self.data.deinit(allocator);
|
||||
self.map.deinit(allocator);
|
||||
}
|
||||
|
||||
pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
|
||||
const result = try self.map.getOrPutContextAdapted(
|
||||
allocator,
|
||||
value,
|
||||
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
|
||||
.{ .bytes = &self.data },
|
||||
);
|
||||
if (result.found_existing) {
|
||||
return result.key_ptr.*;
|
||||
}
|
||||
|
||||
try self.data.ensureUnusedCapacity(allocator, value.len + 1);
|
||||
const offset: u32 = @intCast(self.data.items.len);
|
||||
|
||||
self.data.appendSliceAssumeCapacity(value);
|
||||
self.data.appendAssumeCapacity(0);
|
||||
|
||||
result.key_ptr.* = offset;
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
pub fn get(self: StringTable, offset: u32) []const u8 {
|
||||
std.debug.assert(offset < self.data.items.len);
|
||||
return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
|
||||
}
|
||||
|
||||
pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
|
||||
return self.map.getKeyAdapted(
|
||||
value,
|
||||
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const ExpectedSourceSpan = struct {
|
||||
start_line: usize,
|
||||
end_line: usize,
|
||||
filename: []const u8,
|
||||
};
|
||||
|
||||
fn testParseAndRemoveLineCommands(
|
||||
expected: []const u8,
|
||||
comptime expected_spans: []const ExpectedSourceSpan,
|
||||
source: []const u8,
|
||||
options: ParseAndRemoveLineCommandsOptions,
|
||||
) !void {
|
||||
var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
|
||||
defer std.testing.allocator.free(results.result);
|
||||
defer results.mappings.deinit(std.testing.allocator);
|
||||
|
||||
try std.testing.expectEqualStrings(expected, results.result);
|
||||
|
||||
expectEqualMappings(expected_spans, results.mappings) catch |err| {
|
||||
std.debug.print("\nexpected mappings:\n", .{});
|
||||
for (expected_spans, 0..) |span, i| {
|
||||
const line_num = i + 1;
|
||||
std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
|
||||
}
|
||||
std.debug.print("\nactual mappings:\n", .{});
|
||||
for (results.mappings.mapping.items, 0..) |span, i| {
|
||||
const line_num = i + 1;
|
||||
const filename = results.mappings.files.get(span.filename_offset);
|
||||
std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line });
|
||||
}
|
||||
std.debug.print("\n", .{});
|
||||
return err;
|
||||
};
|
||||
}
|
||||
|
||||
fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
|
||||
try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len);
|
||||
for (expected_spans, 0..) |expected_span, i| {
|
||||
const line_num = i + 1;
|
||||
const span = mappings.get(line_num);
|
||||
const filename = mappings.files.get(span.filename_offset);
|
||||
try std.testing.expectEqual(expected_span.start_line, span.start_line);
|
||||
try std.testing.expectEqual(expected_span.end_line, span.end_line);
|
||||
try std.testing.expectEqualStrings(expected_span.filename, filename);
|
||||
}
|
||||
}
|
||||
|
||||
test "basic" {
|
||||
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
|
||||
}, "#line 1 \"blah.rc\"", .{});
|
||||
}
|
||||
|
||||
test "only removes line commands" {
|
||||
try testParseAndRemoveLineCommands(
|
||||
\\#pragma code_page(65001)
|
||||
, &[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
|
||||
},
|
||||
\\#line 1 "blah.rc"
|
||||
\\#pragma code_page(65001)
|
||||
, .{});
|
||||
}
|
||||
|
||||
test "whitespace and line endings" {
|
||||
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
|
||||
}, "#line \t 1 \t \"blah.rc\"\r\n", .{});
|
||||
}
|
||||
|
||||
test "example" {
|
||||
try testParseAndRemoveLineCommands(
|
||||
\\
|
||||
\\included RCDATA {"hello"}
|
||||
, &[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
|
||||
.{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
|
||||
},
|
||||
\\#line 1 "rcdata.rc"
|
||||
\\#line 1 "<built-in>"
|
||||
\\#line 1 "<built-in>"
|
||||
\\#line 355 "<built-in>"
|
||||
\\#line 1 "<command line>"
|
||||
\\#line 1 "<built-in>"
|
||||
\\#line 1 "rcdata.rc"
|
||||
\\#line 1 "./header.h"
|
||||
\\
|
||||
\\
|
||||
\\2 RCDATA {"blah"}
|
||||
\\
|
||||
\\
|
||||
\\#line 1 "./included.rc"
|
||||
\\
|
||||
\\included RCDATA {"hello"}
|
||||
\\#line 7 "./header.h"
|
||||
\\#line 1 "rcdata.rc"
|
||||
, .{});
|
||||
}
|
||||
|
||||
test "CRLF and other line endings" {
|
||||
try testParseAndRemoveLineCommands(
|
||||
"hello\r\n#pragma code_page(65001)\r\nworld",
|
||||
&[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
|
||||
.{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
|
||||
.{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
|
||||
},
|
||||
"#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
|
||||
.{},
|
||||
);
|
||||
}
|
||||
|
||||
test "no line commands" {
|
||||
try testParseAndRemoveLineCommands(
|
||||
\\1 RCDATA {"blah"}
|
||||
\\2 RCDATA {"blah"}
|
||||
, &[_]ExpectedSourceSpan{
|
||||
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
|
||||
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
|
||||
},
|
||||
\\1 RCDATA {"blah"}
|
||||
\\2 RCDATA {"blah"}
|
||||
, .{ .initial_filename = "blah.rc" });
|
||||
}
|
||||
|
||||
test "in place" {
|
||||
var mut_source = "#line 1 \"blah.rc\"".*;
|
||||
var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
|
||||
defer result.mappings.deinit(std.testing.allocator);
|
||||
try std.testing.expectEqualStrings("", result.result);
|
||||
}
|
||||
83
src/resinator/utils.zig
Normal file
83
src/resinator/utils.zig
Normal file
@ -0,0 +1,83 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
/// Like std.io.FixedBufferStream but does no bounds checking
|
||||
pub const UncheckedSliceWriter = struct {
|
||||
const Self = @This();
|
||||
|
||||
pos: usize = 0,
|
||||
slice: []u8,
|
||||
|
||||
pub fn write(self: *Self, char: u8) void {
|
||||
self.slice[self.pos] = char;
|
||||
self.pos += 1;
|
||||
}
|
||||
|
||||
pub fn writeSlice(self: *Self, slice: []const u8) void {
|
||||
for (slice) |c| {
|
||||
self.write(c);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn getWritten(self: Self) []u8 {
|
||||
return self.slice[0..self.pos];
|
||||
}
|
||||
};
|
||||
|
||||
/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if
|
||||
/// a directory is attempted to be opened.
|
||||
/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed.
|
||||
pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File {
|
||||
const file = try cwd.openFile(path, flags);
|
||||
errdefer file.close();
|
||||
// https://github.com/ziglang/zig/issues/5732
|
||||
if (builtin.os.tag != .windows) {
|
||||
const stat = try file.stat();
|
||||
|
||||
if (stat.kind == .directory)
|
||||
return error.IsDir;
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
/// Emulates the Windows implementation of `iswdigit`, but only returns true
|
||||
/// for the non-ASCII digits that `iswdigit` on Windows would return true for.
|
||||
pub fn isNonAsciiDigit(c: u21) bool {
|
||||
return switch (c) {
|
||||
'²',
|
||||
'³',
|
||||
'¹',
|
||||
'\u{660}'...'\u{669}',
|
||||
'\u{6F0}'...'\u{6F9}',
|
||||
'\u{7C0}'...'\u{7C9}',
|
||||
'\u{966}'...'\u{96F}',
|
||||
'\u{9E6}'...'\u{9EF}',
|
||||
'\u{A66}'...'\u{A6F}',
|
||||
'\u{AE6}'...'\u{AEF}',
|
||||
'\u{B66}'...'\u{B6F}',
|
||||
'\u{BE6}'...'\u{BEF}',
|
||||
'\u{C66}'...'\u{C6F}',
|
||||
'\u{CE6}'...'\u{CEF}',
|
||||
'\u{D66}'...'\u{D6F}',
|
||||
'\u{E50}'...'\u{E59}',
|
||||
'\u{ED0}'...'\u{ED9}',
|
||||
'\u{F20}'...'\u{F29}',
|
||||
'\u{1040}'...'\u{1049}',
|
||||
'\u{1090}'...'\u{1099}',
|
||||
'\u{17E0}'...'\u{17E9}',
|
||||
'\u{1810}'...'\u{1819}',
|
||||
'\u{1946}'...'\u{194F}',
|
||||
'\u{19D0}'...'\u{19D9}',
|
||||
'\u{1B50}'...'\u{1B59}',
|
||||
'\u{1BB0}'...'\u{1BB9}',
|
||||
'\u{1C40}'...'\u{1C49}',
|
||||
'\u{1C50}'...'\u{1C59}',
|
||||
'\u{A620}'...'\u{A629}',
|
||||
'\u{A8D0}'...'\u{A8D9}',
|
||||
'\u{A900}'...'\u{A909}',
|
||||
'\u{AA50}'...'\u{AA59}',
|
||||
'\u{FF10}'...'\u{FF19}',
|
||||
=> true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
588
src/resinator/windows1252.zig
Normal file
588
src/resinator/windows1252.zig
Normal file
@ -0,0 +1,588 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize {
|
||||
var bytes_written: usize = 0;
|
||||
var utf8_buf: [3]u8 = undefined;
|
||||
while (true) {
|
||||
const c = reader.readByte() catch |err| switch (err) {
|
||||
error.EndOfStream => return bytes_written,
|
||||
else => |e| return e,
|
||||
};
|
||||
const codepoint = toCodepoint(c);
|
||||
if (codepoint <= 0x7F) {
|
||||
try writer.writeByte(c);
|
||||
bytes_written += 1;
|
||||
} else {
|
||||
const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable;
|
||||
try writer.writeAll(utf8_buf[0..utf8_len]);
|
||||
bytes_written += utf8_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of code units written to the writer
|
||||
pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 {
|
||||
// Guaranteed to need exactly the same number of code units as Windows-1252 bytes
|
||||
var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0);
|
||||
errdefer allocator.free(utf16_slice);
|
||||
for (win1252_str, 0..) |c, i| {
|
||||
utf16_slice[i] = toCodepoint(c);
|
||||
}
|
||||
return utf16_slice;
|
||||
}
|
||||
|
||||
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
|
||||
pub fn toCodepoint(c: u8) u16 {
|
||||
return switch (c) {
|
||||
0x80 => 0x20ac, // Euro Sign
|
||||
0x82 => 0x201a, // Single Low-9 Quotation Mark
|
||||
0x83 => 0x0192, // Latin Small Letter F With Hook
|
||||
0x84 => 0x201e, // Double Low-9 Quotation Mark
|
||||
0x85 => 0x2026, // Horizontal Ellipsis
|
||||
0x86 => 0x2020, // Dagger
|
||||
0x87 => 0x2021, // Double Dagger
|
||||
0x88 => 0x02c6, // Modifier Letter Circumflex Accent
|
||||
0x89 => 0x2030, // Per Mille Sign
|
||||
0x8a => 0x0160, // Latin Capital Letter S With Caron
|
||||
0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark
|
||||
0x8c => 0x0152, // Latin Capital Ligature Oe
|
||||
0x8e => 0x017d, // Latin Capital Letter Z With Caron
|
||||
0x91 => 0x2018, // Left Single Quotation Mark
|
||||
0x92 => 0x2019, // Right Single Quotation Mark
|
||||
0x93 => 0x201c, // Left Double Quotation Mark
|
||||
0x94 => 0x201d, // Right Double Quotation Mark
|
||||
0x95 => 0x2022, // Bullet
|
||||
0x96 => 0x2013, // En Dash
|
||||
0x97 => 0x2014, // Em Dash
|
||||
0x98 => 0x02dc, // Small Tilde
|
||||
0x99 => 0x2122, // Trade Mark Sign
|
||||
0x9a => 0x0161, // Latin Small Letter S With Caron
|
||||
0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark
|
||||
0x9c => 0x0153, // Latin Small Ligature Oe
|
||||
0x9e => 0x017e, // Latin Small Letter Z With Caron
|
||||
0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis
|
||||
else => c,
|
||||
};
|
||||
}
|
||||
|
||||
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
|
||||
/// Plus some mappings found empirically by iterating all codepoints:
|
||||
/// 0x2007 => 0xA0, // Figure Space
|
||||
/// 0x2008 => ' ', // Punctuation Space
|
||||
/// 0x2009 => ' ', // Thin Space
|
||||
/// 0x200A => ' ', // Hair Space
|
||||
/// 0x2012 => '-', // Figure Dash
|
||||
/// 0x2015 => '-', // Horizontal Bar
|
||||
/// 0x201B => '\'', // Single High-reversed-9 Quotation Mark
|
||||
/// 0x201F => '"', // Double High-reversed-9 Quotation Mark
|
||||
/// 0x202F => 0xA0, // Narrow No-Break Space
|
||||
/// 0x2033 => '"', // Double Prime
|
||||
/// 0x2036 => '"', // Reversed Double Prime
|
||||
pub fn bestFitFromCodepoint(codepoint: u21) ?u8 {
|
||||
return switch (codepoint) {
|
||||
0x00...0x7F,
|
||||
0x81,
|
||||
0x8D,
|
||||
0x8F,
|
||||
0x90,
|
||||
0x9D,
|
||||
0xA0...0xFF,
|
||||
=> @intCast(codepoint),
|
||||
0x0100 => 0x41, // Latin Capital Letter A With Macron
|
||||
0x0101 => 0x61, // Latin Small Letter A With Macron
|
||||
0x0102 => 0x41, // Latin Capital Letter A With Breve
|
||||
0x0103 => 0x61, // Latin Small Letter A With Breve
|
||||
0x0104 => 0x41, // Latin Capital Letter A With Ogonek
|
||||
0x0105 => 0x61, // Latin Small Letter A With Ogonek
|
||||
0x0106 => 0x43, // Latin Capital Letter C With Acute
|
||||
0x0107 => 0x63, // Latin Small Letter C With Acute
|
||||
0x0108 => 0x43, // Latin Capital Letter C With Circumflex
|
||||
0x0109 => 0x63, // Latin Small Letter C With Circumflex
|
||||
0x010a => 0x43, // Latin Capital Letter C With Dot Above
|
||||
0x010b => 0x63, // Latin Small Letter C With Dot Above
|
||||
0x010c => 0x43, // Latin Capital Letter C With Caron
|
||||
0x010d => 0x63, // Latin Small Letter C With Caron
|
||||
0x010e => 0x44, // Latin Capital Letter D With Caron
|
||||
0x010f => 0x64, // Latin Small Letter D With Caron
|
||||
0x0110 => 0xd0, // Latin Capital Letter D With Stroke
|
||||
0x0111 => 0x64, // Latin Small Letter D With Stroke
|
||||
0x0112 => 0x45, // Latin Capital Letter E With Macron
|
||||
0x0113 => 0x65, // Latin Small Letter E With Macron
|
||||
0x0114 => 0x45, // Latin Capital Letter E With Breve
|
||||
0x0115 => 0x65, // Latin Small Letter E With Breve
|
||||
0x0116 => 0x45, // Latin Capital Letter E With Dot Above
|
||||
0x0117 => 0x65, // Latin Small Letter E With Dot Above
|
||||
0x0118 => 0x45, // Latin Capital Letter E With Ogonek
|
||||
0x0119 => 0x65, // Latin Small Letter E With Ogonek
|
||||
0x011a => 0x45, // Latin Capital Letter E With Caron
|
||||
0x011b => 0x65, // Latin Small Letter E With Caron
|
||||
0x011c => 0x47, // Latin Capital Letter G With Circumflex
|
||||
0x011d => 0x67, // Latin Small Letter G With Circumflex
|
||||
0x011e => 0x47, // Latin Capital Letter G With Breve
|
||||
0x011f => 0x67, // Latin Small Letter G With Breve
|
||||
0x0120 => 0x47, // Latin Capital Letter G With Dot Above
|
||||
0x0121 => 0x67, // Latin Small Letter G With Dot Above
|
||||
0x0122 => 0x47, // Latin Capital Letter G With Cedilla
|
||||
0x0123 => 0x67, // Latin Small Letter G With Cedilla
|
||||
0x0124 => 0x48, // Latin Capital Letter H With Circumflex
|
||||
0x0125 => 0x68, // Latin Small Letter H With Circumflex
|
||||
0x0126 => 0x48, // Latin Capital Letter H With Stroke
|
||||
0x0127 => 0x68, // Latin Small Letter H With Stroke
|
||||
0x0128 => 0x49, // Latin Capital Letter I With Tilde
|
||||
0x0129 => 0x69, // Latin Small Letter I With Tilde
|
||||
0x012a => 0x49, // Latin Capital Letter I With Macron
|
||||
0x012b => 0x69, // Latin Small Letter I With Macron
|
||||
0x012c => 0x49, // Latin Capital Letter I With Breve
|
||||
0x012d => 0x69, // Latin Small Letter I With Breve
|
||||
0x012e => 0x49, // Latin Capital Letter I With Ogonek
|
||||
0x012f => 0x69, // Latin Small Letter I With Ogonek
|
||||
0x0130 => 0x49, // Latin Capital Letter I With Dot Above
|
||||
0x0131 => 0x69, // Latin Small Letter Dotless I
|
||||
0x0134 => 0x4a, // Latin Capital Letter J With Circumflex
|
||||
0x0135 => 0x6a, // Latin Small Letter J With Circumflex
|
||||
0x0136 => 0x4b, // Latin Capital Letter K With Cedilla
|
||||
0x0137 => 0x6b, // Latin Small Letter K With Cedilla
|
||||
0x0139 => 0x4c, // Latin Capital Letter L With Acute
|
||||
0x013a => 0x6c, // Latin Small Letter L With Acute
|
||||
0x013b => 0x4c, // Latin Capital Letter L With Cedilla
|
||||
0x013c => 0x6c, // Latin Small Letter L With Cedilla
|
||||
0x013d => 0x4c, // Latin Capital Letter L With Caron
|
||||
0x013e => 0x6c, // Latin Small Letter L With Caron
|
||||
0x0141 => 0x4c, // Latin Capital Letter L With Stroke
|
||||
0x0142 => 0x6c, // Latin Small Letter L With Stroke
|
||||
0x0143 => 0x4e, // Latin Capital Letter N With Acute
|
||||
0x0144 => 0x6e, // Latin Small Letter N With Acute
|
||||
0x0145 => 0x4e, // Latin Capital Letter N With Cedilla
|
||||
0x0146 => 0x6e, // Latin Small Letter N With Cedilla
|
||||
0x0147 => 0x4e, // Latin Capital Letter N With Caron
|
||||
0x0148 => 0x6e, // Latin Small Letter N With Caron
|
||||
0x014c => 0x4f, // Latin Capital Letter O With Macron
|
||||
0x014d => 0x6f, // Latin Small Letter O With Macron
|
||||
0x014e => 0x4f, // Latin Capital Letter O With Breve
|
||||
0x014f => 0x6f, // Latin Small Letter O With Breve
|
||||
0x0150 => 0x4f, // Latin Capital Letter O With Double Acute
|
||||
0x0151 => 0x6f, // Latin Small Letter O With Double Acute
|
||||
0x0152 => 0x8c, // Latin Capital Ligature Oe
|
||||
0x0153 => 0x9c, // Latin Small Ligature Oe
|
||||
0x0154 => 0x52, // Latin Capital Letter R With Acute
|
||||
0x0155 => 0x72, // Latin Small Letter R With Acute
|
||||
0x0156 => 0x52, // Latin Capital Letter R With Cedilla
|
||||
0x0157 => 0x72, // Latin Small Letter R With Cedilla
|
||||
0x0158 => 0x52, // Latin Capital Letter R With Caron
|
||||
0x0159 => 0x72, // Latin Small Letter R With Caron
|
||||
0x015a => 0x53, // Latin Capital Letter S With Acute
|
||||
0x015b => 0x73, // Latin Small Letter S With Acute
|
||||
0x015c => 0x53, // Latin Capital Letter S With Circumflex
|
||||
0x015d => 0x73, // Latin Small Letter S With Circumflex
|
||||
0x015e => 0x53, // Latin Capital Letter S With Cedilla
|
||||
0x015f => 0x73, // Latin Small Letter S With Cedilla
|
||||
0x0160 => 0x8a, // Latin Capital Letter S With Caron
|
||||
0x0161 => 0x9a, // Latin Small Letter S With Caron
|
||||
0x0162 => 0x54, // Latin Capital Letter T With Cedilla
|
||||
0x0163 => 0x74, // Latin Small Letter T With Cedilla
|
||||
0x0164 => 0x54, // Latin Capital Letter T With Caron
|
||||
0x0165 => 0x74, // Latin Small Letter T With Caron
|
||||
0x0166 => 0x54, // Latin Capital Letter T With Stroke
|
||||
0x0167 => 0x74, // Latin Small Letter T With Stroke
|
||||
0x0168 => 0x55, // Latin Capital Letter U With Tilde
|
||||
0x0169 => 0x75, // Latin Small Letter U With Tilde
|
||||
0x016a => 0x55, // Latin Capital Letter U With Macron
|
||||
0x016b => 0x75, // Latin Small Letter U With Macron
|
||||
0x016c => 0x55, // Latin Capital Letter U With Breve
|
||||
0x016d => 0x75, // Latin Small Letter U With Breve
|
||||
0x016e => 0x55, // Latin Capital Letter U With Ring Above
|
||||
0x016f => 0x75, // Latin Small Letter U With Ring Above
|
||||
0x0170 => 0x55, // Latin Capital Letter U With Double Acute
|
||||
0x0171 => 0x75, // Latin Small Letter U With Double Acute
|
||||
0x0172 => 0x55, // Latin Capital Letter U With Ogonek
|
||||
0x0173 => 0x75, // Latin Small Letter U With Ogonek
|
||||
0x0174 => 0x57, // Latin Capital Letter W With Circumflex
|
||||
0x0175 => 0x77, // Latin Small Letter W With Circumflex
|
||||
0x0176 => 0x59, // Latin Capital Letter Y With Circumflex
|
||||
0x0177 => 0x79, // Latin Small Letter Y With Circumflex
|
||||
0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis
|
||||
0x0179 => 0x5a, // Latin Capital Letter Z With Acute
|
||||
0x017a => 0x7a, // Latin Small Letter Z With Acute
|
||||
0x017b => 0x5a, // Latin Capital Letter Z With Dot Above
|
||||
0x017c => 0x7a, // Latin Small Letter Z With Dot Above
|
||||
0x017d => 0x8e, // Latin Capital Letter Z With Caron
|
||||
0x017e => 0x9e, // Latin Small Letter Z With Caron
|
||||
0x0180 => 0x62, // Latin Small Letter B With Stroke
|
||||
0x0189 => 0xd0, // Latin Capital Letter African D
|
||||
0x0191 => 0x83, // Latin Capital Letter F With Hook
|
||||
0x0192 => 0x83, // Latin Small Letter F With Hook
|
||||
0x0197 => 0x49, // Latin Capital Letter I With Stroke
|
||||
0x019a => 0x6c, // Latin Small Letter L With Bar
|
||||
0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde
|
||||
0x01a0 => 0x4f, // Latin Capital Letter O With Horn
|
||||
0x01a1 => 0x6f, // Latin Small Letter O With Horn
|
||||
0x01ab => 0x74, // Latin Small Letter T With Palatal Hook
|
||||
0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook
|
||||
0x01af => 0x55, // Latin Capital Letter U With Horn
|
||||
0x01b0 => 0x75, // Latin Small Letter U With Horn
|
||||
0x01b6 => 0x7a, // Latin Small Letter Z With Stroke
|
||||
0x01c0 => 0x7c, // Latin Letter Dental Click
|
||||
0x01c3 => 0x21, // Latin Letter Retroflex Click
|
||||
0x01cd => 0x41, // Latin Capital Letter A With Caron
|
||||
0x01ce => 0x61, // Latin Small Letter A With Caron
|
||||
0x01cf => 0x49, // Latin Capital Letter I With Caron
|
||||
0x01d0 => 0x69, // Latin Small Letter I With Caron
|
||||
0x01d1 => 0x4f, // Latin Capital Letter O With Caron
|
||||
0x01d2 => 0x6f, // Latin Small Letter O With Caron
|
||||
0x01d3 => 0x55, // Latin Capital Letter U With Caron
|
||||
0x01d4 => 0x75, // Latin Small Letter U With Caron
|
||||
0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron
|
||||
0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron
|
||||
0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute
|
||||
0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute
|
||||
0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron
|
||||
0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron
|
||||
0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave
|
||||
0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave
|
||||
0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron
|
||||
0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron
|
||||
0x01e4 => 0x47, // Latin Capital Letter G With Stroke
|
||||
0x01e5 => 0x67, // Latin Small Letter G With Stroke
|
||||
0x01e6 => 0x47, // Latin Capital Letter G With Caron
|
||||
0x01e7 => 0x67, // Latin Small Letter G With Caron
|
||||
0x01e8 => 0x4b, // Latin Capital Letter K With Caron
|
||||
0x01e9 => 0x6b, // Latin Small Letter K With Caron
|
||||
0x01ea => 0x4f, // Latin Capital Letter O With Ogonek
|
||||
0x01eb => 0x6f, // Latin Small Letter O With Ogonek
|
||||
0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron
|
||||
0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron
|
||||
0x01f0 => 0x6a, // Latin Small Letter J With Caron
|
||||
0x0261 => 0x67, // Latin Small Letter Script G
|
||||
0x02b9 => 0x27, // Modifier Letter Prime
|
||||
0x02ba => 0x22, // Modifier Letter Double Prime
|
||||
0x02bc => 0x27, // Modifier Letter Apostrophe
|
||||
0x02c4 => 0x5e, // Modifier Letter Up Arrowhead
|
||||
0x02c6 => 0x88, // Modifier Letter Circumflex Accent
|
||||
0x02c8 => 0x27, // Modifier Letter Vertical Line
|
||||
0x02c9 => 0xaf, // Modifier Letter Macron
|
||||
0x02ca => 0xb4, // Modifier Letter Acute Accent
|
||||
0x02cb => 0x60, // Modifier Letter Grave Accent
|
||||
0x02cd => 0x5f, // Modifier Letter Low Macron
|
||||
0x02da => 0xb0, // Ring Above
|
||||
0x02dc => 0x98, // Small Tilde
|
||||
0x0300 => 0x60, // Combining Grave Accent
|
||||
0x0301 => 0xb4, // Combining Acute Accent
|
||||
0x0302 => 0x5e, // Combining Circumflex Accent
|
||||
0x0303 => 0x7e, // Combining Tilde
|
||||
0x0304 => 0xaf, // Combining Macron
|
||||
0x0305 => 0xaf, // Combining Overline
|
||||
0x0308 => 0xa8, // Combining Diaeresis
|
||||
0x030a => 0xb0, // Combining Ring Above
|
||||
0x030e => 0x22, // Combining Double Vertical Line Above
|
||||
0x0327 => 0xb8, // Combining Cedilla
|
||||
0x0331 => 0x5f, // Combining Macron Below
|
||||
0x0332 => 0x5f, // Combining Low Line
|
||||
0x037e => 0x3b, // Greek Question Mark
|
||||
0x0393 => 0x47, // Greek Capital Letter Gamma
|
||||
0x0398 => 0x54, // Greek Capital Letter Theta
|
||||
0x03a3 => 0x53, // Greek Capital Letter Sigma
|
||||
0x03a6 => 0x46, // Greek Capital Letter Phi
|
||||
0x03a9 => 0x4f, // Greek Capital Letter Omega
|
||||
0x03b1 => 0x61, // Greek Small Letter Alpha
|
||||
0x03b2 => 0xdf, // Greek Small Letter Beta
|
||||
0x03b4 => 0x64, // Greek Small Letter Delta
|
||||
0x03b5 => 0x65, // Greek Small Letter Epsilon
|
||||
0x03bc => 0xb5, // Greek Small Letter Mu
|
||||
0x03c0 => 0x70, // Greek Small Letter Pi
|
||||
0x03c3 => 0x73, // Greek Small Letter Sigma
|
||||
0x03c4 => 0x74, // Greek Small Letter Tau
|
||||
0x03c6 => 0x66, // Greek Small Letter Phi
|
||||
0x04bb => 0x68, // Cyrillic Small Letter Shha
|
||||
0x0589 => 0x3a, // Armenian Full Stop
|
||||
0x066a => 0x25, // Arabic Percent Sign
|
||||
0x2000 => 0x20, // En Quad
|
||||
0x2001 => 0x20, // Em Quad
|
||||
0x2002 => 0x20, // En Space
|
||||
0x2003 => 0x20, // Em Space
|
||||
0x2004 => 0x20, // Three-Per-Em Space
|
||||
0x2005 => 0x20, // Four-Per-Em Space
|
||||
0x2006 => 0x20, // Six-Per-Em Space
|
||||
0x2010 => 0x2d, // Hyphen
|
||||
0x2011 => 0x2d, // Non-Breaking Hyphen
|
||||
0x2013 => 0x96, // En Dash
|
||||
0x2014 => 0x97, // Em Dash
|
||||
0x2017 => 0x3d, // Double Low Line
|
||||
0x2018 => 0x91, // Left Single Quotation Mark
|
||||
0x2019 => 0x92, // Right Single Quotation Mark
|
||||
0x201a => 0x82, // Single Low-9 Quotation Mark
|
||||
0x201c => 0x93, // Left Double Quotation Mark
|
||||
0x201d => 0x94, // Right Double Quotation Mark
|
||||
0x201e => 0x84, // Double Low-9 Quotation Mark
|
||||
0x2020 => 0x86, // Dagger
|
||||
0x2021 => 0x87, // Double Dagger
|
||||
0x2022 => 0x95, // Bullet
|
||||
0x2024 => 0xb7, // One Dot Leader
|
||||
0x2026 => 0x85, // Horizontal Ellipsis
|
||||
0x2030 => 0x89, // Per Mille Sign
|
||||
0x2032 => 0x27, // Prime
|
||||
0x2035 => 0x60, // Reversed Prime
|
||||
0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark
|
||||
0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark
|
||||
0x2044 => 0x2f, // Fraction Slash
|
||||
0x2070 => 0xb0, // Superscript Zero
|
||||
0x2074 => 0x34, // Superscript Four
|
||||
0x2075 => 0x35, // Superscript Five
|
||||
0x2076 => 0x36, // Superscript Six
|
||||
0x2077 => 0x37, // Superscript Seven
|
||||
0x2078 => 0x38, // Superscript Eight
|
||||
0x207f => 0x6e, // Superscript Latin Small Letter N
|
||||
0x2080 => 0x30, // Subscript Zero
|
||||
0x2081 => 0x31, // Subscript One
|
||||
0x2082 => 0x32, // Subscript Two
|
||||
0x2083 => 0x33, // Subscript Three
|
||||
0x2084 => 0x34, // Subscript Four
|
||||
0x2085 => 0x35, // Subscript Five
|
||||
0x2086 => 0x36, // Subscript Six
|
||||
0x2087 => 0x37, // Subscript Seven
|
||||
0x2088 => 0x38, // Subscript Eight
|
||||
0x2089 => 0x39, // Subscript Nine
|
||||
0x20ac => 0x80, // Euro Sign
|
||||
0x20a1 => 0xa2, // Colon Sign
|
||||
0x20a4 => 0xa3, // Lira Sign
|
||||
0x20a7 => 0x50, // Peseta Sign
|
||||
0x2102 => 0x43, // Double-Struck Capital C
|
||||
0x2107 => 0x45, // Euler Constant
|
||||
0x210a => 0x67, // Script Small G
|
||||
0x210b => 0x48, // Script Capital H
|
||||
0x210c => 0x48, // Black-Letter Capital H
|
||||
0x210d => 0x48, // Double-Struck Capital H
|
||||
0x210e => 0x68, // Planck Constant
|
||||
0x2110 => 0x49, // Script Capital I
|
||||
0x2111 => 0x49, // Black-Letter Capital I
|
||||
0x2112 => 0x4c, // Script Capital L
|
||||
0x2113 => 0x6c, // Script Small L
|
||||
0x2115 => 0x4e, // Double-Struck Capital N
|
||||
0x2118 => 0x50, // Script Capital P
|
||||
0x2119 => 0x50, // Double-Struck Capital P
|
||||
0x211a => 0x51, // Double-Struck Capital Q
|
||||
0x211b => 0x52, // Script Capital R
|
||||
0x211c => 0x52, // Black-Letter Capital R
|
||||
0x211d => 0x52, // Double-Struck Capital R
|
||||
0x2122 => 0x99, // Trade Mark Sign
|
||||
0x2124 => 0x5a, // Double-Struck Capital Z
|
||||
0x2128 => 0x5a, // Black-Letter Capital Z
|
||||
0x212a => 0x4b, // Kelvin Sign
|
||||
0x212b => 0xc5, // Angstrom Sign
|
||||
0x212c => 0x42, // Script Capital B
|
||||
0x212d => 0x43, // Black-Letter Capital C
|
||||
0x212e => 0x65, // Estimated Symbol
|
||||
0x212f => 0x65, // Script Small E
|
||||
0x2130 => 0x45, // Script Capital E
|
||||
0x2131 => 0x46, // Script Capital F
|
||||
0x2133 => 0x4d, // Script Capital M
|
||||
0x2134 => 0x6f, // Script Small O
|
||||
0x2205 => 0xd8, // Empty Set
|
||||
0x2212 => 0x2d, // Minus Sign
|
||||
0x2213 => 0xb1, // Minus-Or-Plus Sign
|
||||
0x2215 => 0x2f, // Division Slash
|
||||
0x2216 => 0x5c, // Set Minus
|
||||
0x2217 => 0x2a, // Asterisk Operator
|
||||
0x2218 => 0xb0, // Ring Operator
|
||||
0x2219 => 0xb7, // Bullet Operator
|
||||
0x221a => 0x76, // Square Root
|
||||
0x221e => 0x38, // Infinity
|
||||
0x2223 => 0x7c, // Divides
|
||||
0x2229 => 0x6e, // Intersection
|
||||
0x2236 => 0x3a, // Ratio
|
||||
0x223c => 0x7e, // Tilde Operator
|
||||
0x2248 => 0x98, // Almost Equal To
|
||||
0x2261 => 0x3d, // Identical To
|
||||
0x2264 => 0x3d, // Less-Than Or Equal To
|
||||
0x2265 => 0x3d, // Greater-Than Or Equal To
|
||||
0x226a => 0xab, // Much Less-Than
|
||||
0x226b => 0xbb, // Much Greater-Than
|
||||
0x22c5 => 0xb7, // Dot Operator
|
||||
0x2302 => 0xa6, // House
|
||||
0x2303 => 0x5e, // Up Arrowhead
|
||||
0x2310 => 0xac, // Reversed Not Sign
|
||||
0x2320 => 0x28, // Top Half Integral
|
||||
0x2321 => 0x29, // Bottom Half Integral
|
||||
0x2329 => 0x3c, // Left-Pointing Angle Bracket
|
||||
0x232a => 0x3e, // Right-Pointing Angle Bracket
|
||||
0x2500 => 0x2d, // Box Drawings Light Horizontal
|
||||
0x2502 => 0xa6, // Box Drawings Light Vertical
|
||||
0x250c => 0x2b, // Box Drawings Light Down And Right
|
||||
0x2510 => 0x2b, // Box Drawings Light Down And Left
|
||||
0x2514 => 0x2b, // Box Drawings Light Up And Right
|
||||
0x2518 => 0x2b, // Box Drawings Light Up And Left
|
||||
0x251c => 0x2b, // Box Drawings Light Vertical And Right
|
||||
0x2524 => 0xa6, // Box Drawings Light Vertical And Left
|
||||
0x252c => 0x2d, // Box Drawings Light Down And Horizontal
|
||||
0x2534 => 0x2d, // Box Drawings Light Up And Horizontal
|
||||
0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal
|
||||
0x2550 => 0x2d, // Box Drawings Double Horizontal
|
||||
0x2551 => 0xa6, // Box Drawings Double Vertical
|
||||
0x2552 => 0x2b, // Box Drawings Down Single And Right Double
|
||||
0x2553 => 0x2b, // Box Drawings Down Double And Right Single
|
||||
0x2554 => 0x2b, // Box Drawings Double Down And Right
|
||||
0x2555 => 0x2b, // Box Drawings Down Single And Left Double
|
||||
0x2556 => 0x2b, // Box Drawings Down Double And Left Single
|
||||
0x2557 => 0x2b, // Box Drawings Double Down And Left
|
||||
0x2558 => 0x2b, // Box Drawings Up Single And Right Double
|
||||
0x2559 => 0x2b, // Box Drawings Up Double And Right Single
|
||||
0x255a => 0x2b, // Box Drawings Double Up And Right
|
||||
0x255b => 0x2b, // Box Drawings Up Single And Left Double
|
||||
0x255c => 0x2b, // Box Drawings Up Double And Left Single
|
||||
0x255d => 0x2b, // Box Drawings Double Up And Left
|
||||
0x255e => 0xa6, // Box Drawings Vertical Single And Right Double
|
||||
0x255f => 0xa6, // Box Drawings Vertical Double And Right Single
|
||||
0x2560 => 0xa6, // Box Drawings Double Vertical And Right
|
||||
0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double
|
||||
0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single
|
||||
0x2563 => 0xa6, // Box Drawings Double Vertical And Left
|
||||
0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double
|
||||
0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single
|
||||
0x2566 => 0x2d, // Box Drawings Double Down And Horizontal
|
||||
0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double
|
||||
0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single
|
||||
0x2569 => 0x2d, // Box Drawings Double Up And Horizontal
|
||||
0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double
|
||||
0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single
|
||||
0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal
|
||||
0x2580 => 0xaf, // Upper Half Block
|
||||
0x2584 => 0x5f, // Lower Half Block
|
||||
0x2588 => 0xa6, // Full Block
|
||||
0x258c => 0xa6, // Left Half Block
|
||||
0x2590 => 0xa6, // Right Half Block
|
||||
0x2591 => 0xa6, // Light Shade
|
||||
0x2592 => 0xa6, // Medium Shade
|
||||
0x2593 => 0xa6, // Dark Shade
|
||||
0x25a0 => 0xa6, // Black Square
|
||||
0x263c => 0xa4, // White Sun With Rays
|
||||
0x2758 => 0x7c, // Light Vertical Bar
|
||||
0x3000 => 0x20, // Ideographic Space
|
||||
0x3008 => 0x3c, // Left Angle Bracket
|
||||
0x3009 => 0x3e, // Right Angle Bracket
|
||||
0x300a => 0xab, // Left Double Angle Bracket
|
||||
0x300b => 0xbb, // Right Double Angle Bracket
|
||||
0x301a => 0x5b, // Left White Square Bracket
|
||||
0x301b => 0x5d, // Right White Square Bracket
|
||||
0x30fb => 0xb7, // Katakana Middle Dot
|
||||
0xff01 => 0x21, // Fullwidth Exclamation Mark
|
||||
0xff02 => 0x22, // Fullwidth Quotation Mark
|
||||
0xff03 => 0x23, // Fullwidth Number Sign
|
||||
0xff04 => 0x24, // Fullwidth Dollar Sign
|
||||
0xff05 => 0x25, // Fullwidth Percent Sign
|
||||
0xff06 => 0x26, // Fullwidth Ampersand
|
||||
0xff07 => 0x27, // Fullwidth Apostrophe
|
||||
0xff08 => 0x28, // Fullwidth Left Parenthesis
|
||||
0xff09 => 0x29, // Fullwidth Right Parenthesis
|
||||
0xff0a => 0x2a, // Fullwidth Asterisk
|
||||
0xff0b => 0x2b, // Fullwidth Plus Sign
|
||||
0xff0c => 0x2c, // Fullwidth Comma
|
||||
0xff0d => 0x2d, // Fullwidth Hyphen-Minus
|
||||
0xff0e => 0x2e, // Fullwidth Full Stop
|
||||
0xff0f => 0x2f, // Fullwidth Solidus
|
||||
0xff10 => 0x30, // Fullwidth Digit Zero
|
||||
0xff11 => 0x31, // Fullwidth Digit One
|
||||
0xff12 => 0x32, // Fullwidth Digit Two
|
||||
0xff13 => 0x33, // Fullwidth Digit Three
|
||||
0xff14 => 0x34, // Fullwidth Digit Four
|
||||
0xff15 => 0x35, // Fullwidth Digit Five
|
||||
0xff16 => 0x36, // Fullwidth Digit Six
|
||||
0xff17 => 0x37, // Fullwidth Digit Seven
|
||||
0xff18 => 0x38, // Fullwidth Digit Eight
|
||||
0xff19 => 0x39, // Fullwidth Digit Nine
|
||||
0xff1a => 0x3a, // Fullwidth Colon
|
||||
0xff1b => 0x3b, // Fullwidth Semicolon
|
||||
0xff1c => 0x3c, // Fullwidth Less-Than Sign
|
||||
0xff1d => 0x3d, // Fullwidth Equals Sign
|
||||
0xff1e => 0x3e, // Fullwidth Greater-Than Sign
|
||||
0xff1f => 0x3f, // Fullwidth Question Mark
|
||||
0xff20 => 0x40, // Fullwidth Commercial At
|
||||
0xff21 => 0x41, // Fullwidth Latin Capital Letter A
|
||||
0xff22 => 0x42, // Fullwidth Latin Capital Letter B
|
||||
0xff23 => 0x43, // Fullwidth Latin Capital Letter C
|
||||
0xff24 => 0x44, // Fullwidth Latin Capital Letter D
|
||||
0xff25 => 0x45, // Fullwidth Latin Capital Letter E
|
||||
0xff26 => 0x46, // Fullwidth Latin Capital Letter F
|
||||
0xff27 => 0x47, // Fullwidth Latin Capital Letter G
|
||||
0xff28 => 0x48, // Fullwidth Latin Capital Letter H
|
||||
0xff29 => 0x49, // Fullwidth Latin Capital Letter I
|
||||
0xff2a => 0x4a, // Fullwidth Latin Capital Letter J
|
||||
0xff2b => 0x4b, // Fullwidth Latin Capital Letter K
|
||||
0xff2c => 0x4c, // Fullwidth Latin Capital Letter L
|
||||
0xff2d => 0x4d, // Fullwidth Latin Capital Letter M
|
||||
0xff2e => 0x4e, // Fullwidth Latin Capital Letter N
|
||||
0xff2f => 0x4f, // Fullwidth Latin Capital Letter O
|
||||
0xff30 => 0x50, // Fullwidth Latin Capital Letter P
|
||||
0xff31 => 0x51, // Fullwidth Latin Capital Letter Q
|
||||
0xff32 => 0x52, // Fullwidth Latin Capital Letter R
|
||||
0xff33 => 0x53, // Fullwidth Latin Capital Letter S
|
||||
0xff34 => 0x54, // Fullwidth Latin Capital Letter T
|
||||
0xff35 => 0x55, // Fullwidth Latin Capital Letter U
|
||||
0xff36 => 0x56, // Fullwidth Latin Capital Letter V
|
||||
0xff37 => 0x57, // Fullwidth Latin Capital Letter W
|
||||
0xff38 => 0x58, // Fullwidth Latin Capital Letter X
|
||||
0xff39 => 0x59, // Fullwidth Latin Capital Letter Y
|
||||
0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z
|
||||
0xff3b => 0x5b, // Fullwidth Left Square Bracket
|
||||
0xff3c => 0x5c, // Fullwidth Reverse Solidus
|
||||
0xff3d => 0x5d, // Fullwidth Right Square Bracket
|
||||
0xff3e => 0x5e, // Fullwidth Circumflex Accent
|
||||
0xff3f => 0x5f, // Fullwidth Low Line
|
||||
0xff40 => 0x60, // Fullwidth Grave Accent
|
||||
0xff41 => 0x61, // Fullwidth Latin Small Letter A
|
||||
0xff42 => 0x62, // Fullwidth Latin Small Letter B
|
||||
0xff43 => 0x63, // Fullwidth Latin Small Letter C
|
||||
0xff44 => 0x64, // Fullwidth Latin Small Letter D
|
||||
0xff45 => 0x65, // Fullwidth Latin Small Letter E
|
||||
0xff46 => 0x66, // Fullwidth Latin Small Letter F
|
||||
0xff47 => 0x67, // Fullwidth Latin Small Letter G
|
||||
0xff48 => 0x68, // Fullwidth Latin Small Letter H
|
||||
0xff49 => 0x69, // Fullwidth Latin Small Letter I
|
||||
0xff4a => 0x6a, // Fullwidth Latin Small Letter J
|
||||
0xff4b => 0x6b, // Fullwidth Latin Small Letter K
|
||||
0xff4c => 0x6c, // Fullwidth Latin Small Letter L
|
||||
0xff4d => 0x6d, // Fullwidth Latin Small Letter M
|
||||
0xff4e => 0x6e, // Fullwidth Latin Small Letter N
|
||||
0xff4f => 0x6f, // Fullwidth Latin Small Letter O
|
||||
0xff50 => 0x70, // Fullwidth Latin Small Letter P
|
||||
0xff51 => 0x71, // Fullwidth Latin Small Letter Q
|
||||
0xff52 => 0x72, // Fullwidth Latin Small Letter R
|
||||
0xff53 => 0x73, // Fullwidth Latin Small Letter S
|
||||
0xff54 => 0x74, // Fullwidth Latin Small Letter T
|
||||
0xff55 => 0x75, // Fullwidth Latin Small Letter U
|
||||
0xff56 => 0x76, // Fullwidth Latin Small Letter V
|
||||
0xff57 => 0x77, // Fullwidth Latin Small Letter W
|
||||
0xff58 => 0x78, // Fullwidth Latin Small Letter X
|
||||
0xff59 => 0x79, // Fullwidth Latin Small Letter Y
|
||||
0xff5a => 0x7a, // Fullwidth Latin Small Letter Z
|
||||
0xff5b => 0x7b, // Fullwidth Left Curly Bracket
|
||||
0xff5c => 0x7c, // Fullwidth Vertical Line
|
||||
0xff5d => 0x7d, // Fullwidth Right Curly Bracket
|
||||
0xff5e => 0x7e, // Fullwidth Tilde
|
||||
// Not in the best fit mapping, but RC uses these mappings too
|
||||
0x2007 => 0xA0, // Figure Space
|
||||
0x2008 => ' ', // Punctuation Space
|
||||
0x2009 => ' ', // Thin Space
|
||||
0x200A => ' ', // Hair Space
|
||||
0x2012 => '-', // Figure Dash
|
||||
0x2015 => '-', // Horizontal Bar
|
||||
0x201B => '\'', // Single High-reversed-9 Quotation Mark
|
||||
0x201F => '"', // Double High-reversed-9 Quotation Mark
|
||||
0x202F => 0xA0, // Narrow No-Break Space
|
||||
0x2033 => '"', // Double Prime
|
||||
0x2036 => '"', // Reversed Double Prime
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
test "windows-1252 to utf8" {
|
||||
var buf = std.ArrayList(u8).init(std.testing.allocator);
|
||||
defer buf.deinit();
|
||||
|
||||
const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
|
||||
const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
|
||||
|
||||
var fbs = std.io.fixedBufferStream(input_windows1252);
|
||||
const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader());
|
||||
|
||||
try std.testing.expectEqualStrings(expected_utf8, buf.items);
|
||||
try std.testing.expectEqual(expected_utf8.len, bytes_written);
|
||||
}
|
||||
@ -194,6 +194,10 @@ pub const build_cases = [_]BuildCase{
|
||||
.build_root = "test/standalone/load_dynamic_library",
|
||||
.import = @import("standalone/load_dynamic_library/build.zig"),
|
||||
},
|
||||
.{
|
||||
.build_root = "test/standalone/windows_resources",
|
||||
.import = @import("standalone/windows_resources/build.zig"),
|
||||
},
|
||||
.{
|
||||
.build_root = "test/standalone/windows_spawn",
|
||||
.import = @import("standalone/windows_spawn/build.zig"),
|
||||
|
||||
40
test/standalone/windows_resources/build.zig
Normal file
40
test/standalone/windows_resources/build.zig
Normal file
@ -0,0 +1,40 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const test_step = b.step("test", "Test it");
|
||||
b.default_step = test_step;
|
||||
|
||||
const native_target: std.zig.CrossTarget = .{};
|
||||
const cross_target = .{
|
||||
.cpu_arch = .x86_64,
|
||||
.os_tag = .windows,
|
||||
.abi = .gnu,
|
||||
};
|
||||
|
||||
add(b, native_target, .any, test_step);
|
||||
add(b, cross_target, .any, test_step);
|
||||
|
||||
add(b, native_target, .gnu, test_step);
|
||||
add(b, cross_target, .gnu, test_step);
|
||||
}
|
||||
|
||||
fn add(b: *std.Build, target: std.zig.CrossTarget, rc_includes: enum { any, gnu }, test_step: *std.Build.Step) void {
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "zig_resource_test",
|
||||
.root_source_file = .{ .path = "main.zig" },
|
||||
.target = target,
|
||||
.optimize = .Debug,
|
||||
});
|
||||
exe.addWin32ResourceFile(.{
|
||||
.file = .{ .path = "res/zig.rc" },
|
||||
.flags = &.{"/c65001"}, // UTF-8 code page
|
||||
});
|
||||
exe.rc_includes = switch (rc_includes) {
|
||||
.any => .any,
|
||||
.gnu => .gnu,
|
||||
};
|
||||
|
||||
_ = exe.getEmittedBin();
|
||||
|
||||
test_step.dependOn(&exe.step);
|
||||
}
|
||||
5
test/standalone/windows_resources/main.zig
Normal file
5
test/standalone/windows_resources/main.zig
Normal file
@ -0,0 +1,5 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
|
||||
}
|
||||
1
test/standalone/windows_resources/res/hello.bin
Normal file
1
test/standalone/windows_resources/res/hello.bin
Normal file
@ -0,0 +1 @@
|
||||
abcdefg
|
||||
1
test/standalone/windows_resources/res/sub/sub.rc
Normal file
1
test/standalone/windows_resources/res/sub/sub.rc
Normal file
@ -0,0 +1 @@
|
||||
2 RCDATA hello.bin
|
||||
BIN
test/standalone/windows_resources/res/zig.ico
Normal file
BIN
test/standalone/windows_resources/res/zig.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 175 KiB |
40
test/standalone/windows_resources/res/zig.rc
Normal file
40
test/standalone/windows_resources/res/zig.rc
Normal file
@ -0,0 +1,40 @@
|
||||
#define ICO_ID 1
|
||||
|
||||
// Nothing from windows.h is used in this .rc file,
|
||||
// but it's common to include it within a .rc file
|
||||
// so this makes sure that it can be found on
|
||||
// all platforms.
|
||||
#include "windows.h"
|
||||
|
||||
ICO_ID ICON "zig.ico"
|
||||
|
||||
1 VERSIONINFO
|
||||
FILEVERSION 1L,0,0,2
|
||||
PRODUCTVERSION 1,0,0,1
|
||||
FILEFLAGSMASK 0x3fL
|
||||
FILEFLAGS 0x1L
|
||||
FILEOS 0x4L
|
||||
FILETYPE 0x1L
|
||||
FILESUBTYPE 0x0L
|
||||
BEGIN
|
||||
BLOCK "StringFileInfo"
|
||||
BEGIN
|
||||
BLOCK "040904e4"
|
||||
BEGIN
|
||||
VALUE "CompanyName", "Zig"
|
||||
VALUE "FileDescription", "My cool zig program"
|
||||
VALUE "FileVersion", "1.0.0.1"
|
||||
VALUE "InternalName", "zig-ico.exe"
|
||||
VALUE "LegalCopyright", "(c) no one"
|
||||
VALUE "OriginalFilename", "zig-ico.exe"
|
||||
VALUE "ProductName", "Zig but with an icon"
|
||||
VALUE "ProductVersion", "1.0.0.1"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
BEGIN
|
||||
VALUE "Translation", 0x409, 1252
|
||||
END
|
||||
END
|
||||
|
||||
#include "sub/sub.rc"
|
||||
Loading…
x
Reference in New Issue
Block a user