Merge pull request #17069 from squeek502/resinator

Add a `.rc` -> `.res` compiler to the Zig compiler
This commit is contained in:
Andrew Kelley 2023-09-22 12:18:50 -07:00 committed by GitHub
commit 3fc7413574
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 17043 additions and 13 deletions

View File

@ -90,6 +90,14 @@ is_linking_libc: bool,
is_linking_libcpp: bool,
vcpkg_bin_path: ?[]const u8 = null,
// keep in sync with src/Compilation.zig:RcIncludes
/// Behavior of automatic detection of include directories when compiling .rc files.
/// any: Use MSVC if available, fall back to MinGW.
/// msvc: Use MSVC include paths (must be present on the system).
/// gnu: Use MinGW include paths (distributed with Zig).
/// none: Do not use any autodetected include paths.
rc_includes: enum { any, msvc, gnu, none } = .any,
installed_path: ?[]const u8,
/// Base address for an executable image.
@ -221,6 +229,26 @@ pub const CSourceFile = struct {
}
};
pub const RcSourceFile = struct {
file: LazyPath,
/// Any option that rc.exe accepts will work here, with the exception of:
/// - `/fo`: The output filename is set by the build system
/// - Any MUI-related option
/// https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line-
///
/// Implicitly defined options:
/// /x (ignore the INCLUDE environment variable)
/// /D_DEBUG or /DNDEBUG depending on the optimization mode
flags: []const []const u8 = &.{},
pub fn dupe(self: RcSourceFile, b: *std.Build) RcSourceFile {
return .{
.file = self.file.dupe(b),
.flags = b.dupeStrings(self.flags),
};
}
};
pub const LinkObject = union(enum) {
static_path: LazyPath,
other_step: *Compile,
@ -228,6 +256,7 @@ pub const LinkObject = union(enum) {
assembly_file: LazyPath,
c_source_file: *CSourceFile,
c_source_files: *CSourceFiles,
win32_resource_file: *RcSourceFile,
};
pub const SystemLib = struct {
@ -910,6 +939,18 @@ pub fn addCSourceFile(self: *Compile, source: CSourceFile) void {
source.file.addStepDependencies(&self.step);
}
pub fn addWin32ResourceFile(self: *Compile, source: RcSourceFile) void {
// Only the PE/COFF format has a Resource Table, so for any other target
// the resource file is just ignored.
if (self.target.getObjectFormat() != .coff) return;
const b = self.step.owner;
const rc_source_file = b.allocator.create(RcSourceFile) catch @panic("OOM");
rc_source_file.* = source.dupe(b);
self.link_objects.append(.{ .win32_resource_file = rc_source_file }) catch @panic("OOM");
source.file.addStepDependencies(&self.step);
}
pub fn setVerboseLink(self: *Compile, value: bool) void {
self.verbose_link = value;
}
@ -1358,6 +1399,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
try transitive_deps.add(self.link_objects.items);
var prev_has_cflags = false;
var prev_has_rcflags = false;
var prev_search_strategy: SystemLib.SearchStrategy = .paths_first;
var prev_preferred_link_mode: std.builtin.LinkMode = .Dynamic;
@ -1500,6 +1542,24 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
try zig_args.append(b.pathFromRoot(file));
}
},
.win32_resource_file => |rc_source_file| {
if (rc_source_file.flags.len == 0) {
if (prev_has_rcflags) {
try zig_args.append("-rcflags");
try zig_args.append("--");
prev_has_rcflags = false;
}
} else {
try zig_args.append("-rcflags");
for (rc_source_file.flags) |arg| {
try zig_args.append(arg);
}
try zig_args.append("--");
prev_has_rcflags = true;
}
try zig_args.append(rc_source_file.file.getPath(b));
},
}
}
@ -1897,6 +1957,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
}
}
if (self.rc_includes != .any) {
try zig_args.append("-rcincludes");
try zig_args.append(@tagName(self.rc_includes));
}
try addFlag(&zig_args, "valgrind", self.valgrind_support);
try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath);

View File

@ -421,7 +421,7 @@ pub const Wip = struct {
_ = try addExtra(wip, rt);
}
pub fn addBundle(wip: *Wip, other: ErrorBundle) !void {
pub fn addBundleAsNotes(wip: *Wip, other: ErrorBundle) !void {
const gpa = wip.gpa;
try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len);
@ -436,6 +436,21 @@ pub const Wip = struct {
}
}
pub fn addBundleAsRoots(wip: *Wip, other: ErrorBundle) !void {
const gpa = wip.gpa;
try wip.string_bytes.ensureUnusedCapacity(gpa, other.string_bytes.len);
try wip.extra.ensureUnusedCapacity(gpa, other.extra.len);
const other_list = other.getMessages();
try wip.root_list.ensureUnusedCapacity(gpa, other_list.len);
for (other_list) |other_msg| {
// The ensureUnusedCapacity calls above guarantees this.
wip.root_list.appendAssumeCapacity(wip.addOtherMessage(other, other_msg) catch unreachable);
}
}
pub fn reserveNotes(wip: *Wip, notes_len: u32) !u32 {
try wip.extra.ensureUnusedCapacity(wip.gpa, notes_len +
notes_len * @typeInfo(ErrorBundle.ErrorMessage).Struct.fields.len);
@ -474,7 +489,10 @@ pub const Wip = struct {
.span_start = other_sl.span_start,
.span_main = other_sl.span_main,
.span_end = other_sl.span_end,
.source_line = try wip.addString(other.nullTerminatedString(other_sl.source_line)),
.source_line = if (other_sl.source_line != 0)
try wip.addString(other.nullTerminatedString(other_sl.source_line))
else
0,
.reference_trace_len = other_sl.reference_trace_len,
});

View File

@ -39,6 +39,7 @@ const libtsan = @import("libtsan.zig");
const Zir = @import("Zir.zig");
const Autodoc = @import("Autodoc.zig");
const Color = @import("main.zig").Color;
const resinator = @import("resinator.zig");
/// General-purpose allocator. Used for both temporary and long-term storage.
gpa: Allocator,
@ -46,6 +47,7 @@ gpa: Allocator,
arena_state: std.heap.ArenaAllocator.State,
bin_file: *link.File,
c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{},
win32_resource_table: std.AutoArrayHashMapUnmanaged(*Win32Resource, void) = .{},
/// This is a pointer to a local variable inside `update()`.
whole_cache_manifest: ?*Cache.Manifest = null,
whole_cache_manifest_mutex: std.Thread.Mutex = .{},
@ -60,6 +62,10 @@ anon_work_queue: std.fifo.LinearFifo(Job, .Dynamic),
/// gets linked with the Compilation.
c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
/// These jobs are to invoke the RC compiler to create a compiled resource file (.res), which
/// gets linked with the Compilation.
win32_resource_work_queue: std.fifo.LinearFifo(*Win32Resource, .Dynamic),
/// These jobs are to tokenize, parse, and astgen files, which may be outdated
/// since the last compilation, as well as scan for `@import` and queue up
/// additional jobs corresponding to those new files.
@ -73,6 +79,10 @@ embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic),
/// This data is accessed by multiple threads and is protected by `mutex`.
failed_c_objects: std.AutoArrayHashMapUnmanaged(*CObject, *CObject.ErrorMsg) = .{},
/// The ErrorBundle memory is owned by the `Win32Resource`, using Compilation's general purpose allocator.
/// This data is accessed by multiple threads and is protected by `mutex`.
failed_win32_resources: std.AutoArrayHashMapUnmanaged(*Win32Resource, ErrorBundle) = .{},
/// Miscellaneous things that can fail.
misc_failures: std.AutoArrayHashMapUnmanaged(MiscTask, MiscError) = .{},
@ -109,6 +119,7 @@ last_update_was_cache_hit: bool = false,
c_source_files: []const CSourceFile,
clang_argv: []const []const u8,
rc_source_files: []const RcSourceFile,
cache_parent: *Cache,
/// Path to own executable for invoking `zig clang`.
self_exe_path: ?[]const u8,
@ -125,6 +136,7 @@ local_cache_directory: Directory,
global_cache_directory: Directory,
libc_include_dir_list: []const []const u8,
libc_framework_dir_list: []const []const u8,
rc_include_dir_list: []const []const u8,
thread_pool: *ThreadPool,
/// Populated when we build the libc++ static library. A Job to build this is placed in the queue
@ -225,6 +237,23 @@ pub const CSourceFile = struct {
ext: ?FileExt = null,
};
/// For passing to resinator.
pub const RcSourceFile = struct {
src_path: []const u8,
extra_flags: []const []const u8 = &.{},
};
pub const RcIncludes = enum {
/// Use MSVC if available, fall back to MinGW.
any,
/// Use MSVC include paths (MSVC install + Windows SDK, must be present on the system).
msvc,
/// Use MinGW include paths (distributed with Zig).
gnu,
/// Do not use any autodetected include paths.
none,
};
const Job = union(enum) {
/// Write the constant value for a Decl to the output file.
codegen_decl: Module.Decl.Index,
@ -326,6 +355,50 @@ pub const CObject = struct {
}
};
pub const Win32Resource = struct {
/// Relative to cwd. Owned by arena.
src: RcSourceFile,
status: union(enum) {
new,
success: struct {
/// The outputted result. Owned by gpa.
res_path: []u8,
/// This is a file system lock on the cache hash manifest representing this
/// object. It prevents other invocations of the Zig compiler from interfering
/// with this object until released.
lock: Cache.Lock,
},
/// There will be a corresponding ErrorMsg in Compilation.failed_win32_resources.
failure,
/// A transient failure happened when trying to compile the resource file; it may
/// succeed if we try again. There may be a corresponding ErrorMsg in
/// Compilation.failed_win32_resources. If there is not, the failure is out of memory.
failure_retryable,
},
/// Returns true if there was failure.
pub fn clearStatus(self: *Win32Resource, gpa: Allocator) bool {
switch (self.status) {
.new => return false,
.failure, .failure_retryable => {
self.status = .new;
return true;
},
.success => |*success| {
gpa.free(success.res_path);
success.lock.release();
self.status = .new;
return false;
},
}
}
pub fn destroy(self: *Win32Resource, gpa: Allocator) void {
_ = self.clearStatus(gpa);
gpa.destroy(self);
}
};
pub const MiscTask = enum {
write_builtin_zig,
glibc_crt_file,
@ -505,6 +578,8 @@ pub const InitOptions = struct {
rpath_list: []const []const u8 = &[0][]const u8{},
symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{},
c_source_files: []const CSourceFile = &[0]CSourceFile{},
rc_source_files: []const RcSourceFile = &[0]RcSourceFile{},
rc_includes: RcIncludes = .any,
link_objects: []LinkObject = &[0]LinkObject{},
framework_dirs: []const []const u8 = &[0][]const u8{},
frameworks: []const Framework = &.{},
@ -938,6 +1013,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
options.libc_installation,
);
const rc_dirs = try detectWin32ResourceIncludeDirs(
arena,
options,
);
const sysroot = options.sysroot orelse libc_dirs.sysroot;
const must_pie = target_util.requiresPIE(options.target);
@ -1591,16 +1671,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
.work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
.anon_work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
.c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
.win32_resource_work_queue = std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa),
.astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa),
.embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa),
.keep_source_files_loaded = options.keep_source_files_loaded,
.use_clang = use_clang,
.clang_argv = options.clang_argv,
.c_source_files = options.c_source_files,
.rc_source_files = options.rc_source_files,
.cache_parent = cache,
.self_exe_path = options.self_exe_path,
.libc_include_dir_list = libc_dirs.libc_include_dir_list,
.libc_framework_dir_list = libc_dirs.libc_framework_dir_list,
.rc_include_dir_list = rc_dirs.libc_include_dir_list,
.sanitize_c = sanitize_c,
.thread_pool = options.thread_pool,
.clang_passthrough_mode = options.clang_passthrough_mode,
@ -1647,6 +1730,19 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation {
comp.c_object_table.putAssumeCapacityNoClobber(c_object, {});
}
// Add a `Win32Resource` for each `rc_source_files`.
try comp.win32_resource_table.ensureTotalCapacity(gpa, options.rc_source_files.len);
for (options.rc_source_files) |rc_source_file| {
const win32_resource = try gpa.create(Win32Resource);
errdefer gpa.destroy(win32_resource);
win32_resource.* = .{
.status = .{ .new = {} },
.src = rc_source_file,
};
comp.win32_resource_table.putAssumeCapacityNoClobber(win32_resource, {});
}
const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null;
if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies and target.ofmt != .c) {
@ -1804,6 +1900,7 @@ pub fn destroy(self: *Compilation) void {
self.work_queue.deinit();
self.anon_work_queue.deinit();
self.c_object_work_queue.deinit();
self.win32_resource_work_queue.deinit();
self.astgen_work_queue.deinit();
self.embed_file_work_queue.deinit();
@ -1852,6 +1949,16 @@ pub fn destroy(self: *Compilation) void {
}
self.failed_c_objects.deinit(gpa);
for (self.win32_resource_table.keys()) |key| {
key.destroy(gpa);
}
self.win32_resource_table.deinit(gpa);
for (self.failed_win32_resources.values()) |*value| {
value.deinit(gpa);
}
self.failed_win32_resources.deinit(gpa);
for (self.lld_errors.items) |*lld_error| {
lld_error.deinit(gpa);
}
@ -2014,6 +2121,13 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void
comp.c_object_work_queue.writeItemAssumeCapacity(key);
}
// For compiling Win32 resources, we rely on the cache hash system to avoid duplicating work.
// Add a Job for each Win32 resource file.
try comp.win32_resource_work_queue.ensureUnusedCapacity(comp.win32_resource_table.count());
for (comp.win32_resource_table.keys()) |key| {
comp.win32_resource_work_queue.writeItemAssumeCapacity(key);
}
if (comp.bin_file.options.module) |module| {
module.compile_log_text.shrinkAndFree(module.gpa, 0);
module.generation += 1;
@ -2336,6 +2450,13 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes
man.hash.addListOfBytes(key.src.extra_flags);
}
for (comp.win32_resource_table.keys()) |key| {
_ = try man.addFile(key.src.src_path, null);
man.hash.addListOfBytes(key.src.extra_flags);
}
man.hash.addListOfBytes(comp.rc_include_dir_list);
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_asm);
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_ir);
cache_helpers.addOptionalEmitLoc(&man.hash, comp.emit_llvm_bc);
@ -2571,8 +2692,14 @@ pub fn makeBinFileWritable(self: *Compilation) !void {
/// This function is temporally single-threaded.
pub fn totalErrorCount(self: *Compilation) u32 {
var total: usize = self.failed_c_objects.count() + self.misc_failures.count() +
@intFromBool(self.alloc_failure_occurred) + self.lld_errors.items.len;
var total: usize = self.failed_c_objects.count() +
self.misc_failures.count() +
@intFromBool(self.alloc_failure_occurred) +
self.lld_errors.items.len;
for (self.failed_win32_resources.values()) |errs| {
total += errs.errorMessageCount();
}
if (self.bin_file.options.module) |module| {
total += module.failed_exports.count();
@ -2664,6 +2791,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle {
}
}
{
var it = self.failed_win32_resources.iterator();
while (it.next()) |entry| {
try bundle.addBundleAsRoots(entry.value_ptr.*);
}
}
for (self.lld_errors.items) |lld_error| {
const notes_len = @as(u32, @intCast(lld_error.context_lines.len));
@ -2683,7 +2817,7 @@ pub fn getAllErrorsAlloc(self: *Compilation) !ErrorBundle {
.msg = try bundle.addString(value.msg),
.notes_len = if (value.children) |b| b.errorMessageCount() else 0,
});
if (value.children) |b| try bundle.addBundle(b);
if (value.children) |b| try bundle.addBundleAsNotes(b);
}
if (self.alloc_failure_occurred) {
try bundle.addRootErrorMessage(.{
@ -3082,6 +3216,9 @@ pub fn performAllTheWork(
var c_obj_prog_node = main_progress_node.start("Compile C Objects", comp.c_source_files.len);
defer c_obj_prog_node.end();
var win32_resource_prog_node = main_progress_node.start("Compile Win32 Resources", comp.rc_source_files.len);
defer win32_resource_prog_node.end();
var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", comp.embed_file_work_queue.count);
defer embed_file_prog_node.end();
@ -3130,6 +3267,13 @@ pub fn performAllTheWork(
comp, c_object, &c_obj_prog_node, &comp.work_queue_wait_group,
});
}
while (comp.win32_resource_work_queue.readItem()) |win32_resource| {
comp.work_queue_wait_group.start();
try comp.thread_pool.spawn(workerUpdateWin32Resource, .{
comp, win32_resource, &win32_resource_prog_node, &comp.work_queue_wait_group,
});
}
}
if (comp.bin_file.options.module) |mod| {
@ -3659,6 +3803,14 @@ pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest {
return man;
}
pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest {
var man = comp.cache_parent.obtain();
man.hash.addListOfBytes(comp.rc_include_dir_list);
return man;
}
test "cImport" {
_ = cImport;
}
@ -3832,6 +3984,26 @@ fn workerUpdateCObject(
};
}
fn workerUpdateWin32Resource(
comp: *Compilation,
win32_resource: *Win32Resource,
progress_node: *std.Progress.Node,
wg: *WaitGroup,
) void {
defer wg.finish();
comp.updateWin32Resource(win32_resource, progress_node) catch |err| switch (err) {
error.AnalysisFail => return,
else => {
comp.reportRetryableWin32ResourceError(win32_resource, err) catch |oom| switch (oom) {
// Swallowing this error is OK because it's implied to be OOM when
// there is a missing failed_win32_resources error message.
error.OutOfMemory => {},
};
},
};
}
fn buildCompilerRtOneShot(
comp: *Compilation,
output_mode: std.builtin.OutputMode,
@ -3877,6 +4049,18 @@ fn reportRetryableCObjectError(
}
}
fn reportRetryableWin32ResourceError(
comp: *Compilation,
win32_resource: *Win32Resource,
err: anyerror,
) error{OutOfMemory}!void {
win32_resource.status = .failure_retryable;
// TODO: something
_ = comp;
_ = @errorName(err);
}
fn reportRetryableAstGenError(
comp: *Compilation,
src: AstGenSrc,
@ -4233,6 +4417,311 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P
};
}
fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void {
if (!build_options.have_llvm) {
return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{});
}
const self_exe_path = comp.self_exe_path orelse
return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{});
const tracy_trace = trace(@src());
defer tracy_trace.end();
log.debug("updating win32 resource: {s}", .{win32_resource.src.src_path});
if (win32_resource.clearStatus(comp.gpa)) {
// There was previous failure.
comp.mutex.lock();
defer comp.mutex.unlock();
// If the failure was OOM, there will not be an entry here, so we do
// not assert discard.
_ = comp.failed_win32_resources.swapRemove(win32_resource);
}
var man = comp.obtainWin32ResourceCacheManifest();
defer man.deinit();
_ = try man.addFile(win32_resource.src.src_path, null);
man.hash.addListOfBytes(win32_resource.src.extra_flags);
var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
const rc_basename = std.fs.path.basename(win32_resource.src.src_path);
win32_resource_prog_node.activate();
var child_progress_node = win32_resource_prog_node.start(rc_basename, 0);
child_progress_node.activate();
defer child_progress_node.end();
const rc_basename_noext = rc_basename[0 .. rc_basename.len - std.fs.path.extension(rc_basename).len];
const digest = if (try man.hit()) man.final() else blk: {
const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext});
const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename);
var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{});
defer zig_cache_tmp_dir.close();
const res_filename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext});
// We can't know the digest until we do the compilation,
// so we need a temporary filename.
const out_res_path = try comp.tmpFilePath(arena, res_filename);
var options = options: {
var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, win32_resource.src.extra_flags.len + 4);
defer resinator_args.deinit(comp.gpa);
resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg
resinator_args.appendSliceAssumeCapacity(win32_resource.src.extra_flags);
resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path });
var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa);
defer cli_diagnostics.deinit();
var options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) {
error.ParseError => {
return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics);
},
else => |e| return e,
};
break :options options;
};
defer options.deinit();
var argv = std.ArrayList([]const u8).init(comp.gpa);
defer argv.deinit();
var temp_strings = std.ArrayList([]const u8).init(comp.gpa);
defer {
for (temp_strings.items) |temp_string| {
comp.gpa.free(temp_string);
}
temp_strings.deinit();
}
// TODO: support options.preprocess == .no and .only
// alternatively, error if those options are used
try argv.appendSlice(&[_][]const u8{
self_exe_path,
"clang",
"-E", // preprocessor only
"--comments",
"-fuse-line-directives", // #line <num> instead of # <num>
"-xc", // output c
"-Werror=null-character", // error on null characters instead of converting them to spaces
"-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things
"-DRC_INVOKED", // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros
});
// Using -fms-compatibility and targeting the gnu abi interact in a strange way:
// - Targeting the GNU abi stops _MSC_VER from being defined
// - Passing -fms-compatibility stops __GNUC__ from being defined
// Neither being defined is a problem for things like things like MinGW's
// vadefs.h, which will fail during preprocessing if neither are defined.
// So, when targeting the GNU abi, we need to force __GNUC__ to be defined.
//
// TODO: This is a workaround that should be removed if possible.
if (comp.getTarget().isGnu()) {
// This is the same default gnuc version that Clang uses:
// https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738
try argv.append("-fgnuc-version=4.2.1");
}
for (options.extra_include_paths.items) |extra_include_path| {
try argv.append("--include-directory");
try argv.append(extra_include_path);
}
var symbol_it = options.symbols.iterator();
while (symbol_it.next()) |entry| {
switch (entry.value_ptr.*) {
.define => |value| {
try argv.append("-D");
const define_arg = arg: {
const arg = try std.fmt.allocPrint(comp.gpa, "{s}={s}", .{ entry.key_ptr.*, value });
errdefer comp.gpa.free(arg);
try temp_strings.append(arg);
break :arg arg;
};
try argv.append(define_arg);
},
.undefine => {
try argv.append("-U");
try argv.append(entry.key_ptr.*);
},
}
}
try argv.append(win32_resource.src.src_path);
try argv.appendSlice(&[_][]const u8{
"-o",
out_rcpp_path,
});
const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path});
// Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization
// mode. While these defines are not normally present when calling rc.exe directly,
// them being defined matches the behavior of how MSVC calls rc.exe which is the more
// relevant behavior in this case.
try comp.addCCArgs(arena, &argv, .rc, out_dep_path);
if (comp.verbose_cc) {
dump_argv(argv.items);
}
if (std.process.can_spawn) {
var child = std.ChildProcess.init(argv.items, arena);
child.stdin_behavior = .Ignore;
child.stdout_behavior = .Ignore;
child.stderr_behavior = .Pipe;
try child.spawn();
const stderr_reader = child.stderr.?.reader();
const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024);
const term = child.wait() catch |err| {
return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) });
};
switch (term) {
.Exited => |code| {
if (code != 0) {
// TODO parse clang stderr and turn it into an error message
// and then call failCObjWithOwnedErrorMsg
log.err("clang preprocessor failed with stderr:\n{s}", .{stderr});
return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code});
}
},
else => {
log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr});
return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{});
},
}
} else {
const exit_code = try clangMain(arena, argv.items);
if (exit_code != 0) {
return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code});
}
}
const dep_basename = std.fs.path.basename(out_dep_path);
// Add the files depended on to the cache system.
try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
if (comp.whole_cache_manifest) |whole_cache_manifest| {
comp.whole_cache_manifest_mutex.lock();
defer comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
}
// Just to save disk space, we delete the file because it is never needed again.
zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) });
};
var full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
else => |e| {
return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) });
},
};
var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = win32_resource.src.src_path });
defer mapping_results.mappings.deinit(arena);
var final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings);
var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| {
return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) });
};
var output_file_closed = false;
defer if (!output_file_closed) output_file.close();
var diagnostics = resinator.errors.Diagnostics.init(arena);
defer diagnostics.deinit();
var dependencies_list = std.ArrayList([]const u8).init(comp.gpa);
defer {
for (dependencies_list.items) |item| {
comp.gpa.free(item);
}
dependencies_list.deinit();
}
var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{
.cwd = std.fs.cwd(),
.diagnostics = &diagnostics,
.source_mappings = &mapping_results.mappings,
.dependencies_list = &dependencies_list,
.system_include_paths = comp.rc_include_dir_list,
.ignore_include_env_var = true,
// options
.extra_include_paths = options.extra_include_paths.items,
.default_language_id = options.default_language_id,
.default_code_page = options.default_code_page orelse .windows1252,
.verbose = options.verbose,
.null_terminate_string_table_strings = options.null_terminate_string_table_strings,
.max_string_literal_codepoints = options.max_string_literal_codepoints,
.silent_duplicate_control_ids = options.silent_duplicate_control_ids,
.warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
}) catch |err| switch (err) {
error.ParseError, error.CompileError => {
// Delete the output file on error
output_file.close();
output_file_closed = true;
// Failing to delete is not really a big deal, so swallow any errors
zig_cache_tmp_dir.deleteFile(out_res_path) catch {
log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) });
};
return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings);
},
else => |e| return e,
};
try output_buffered_stream.flush();
for (dependencies_list.items) |dep_file_path| {
try man.addFilePost(dep_file_path);
if (comp.whole_cache_manifest) |whole_cache_manifest| {
comp.whole_cache_manifest_mutex.lock();
defer comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addFilePost(dep_file_path);
}
}
// Rename into place.
const digest = man.final();
const o_sub_path = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest });
var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{});
defer o_dir.close();
const tmp_basename = std.fs.path.basename(out_res_path);
try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename);
const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path);
try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename);
break :blk digest;
};
if (man.have_exclusive_lock) {
// Write the updated manifest. This is a no-op if the manifest is not dirty. Note that it is
// possible we had a hit and the manifest is dirty, for example if the file mtime changed but
// the contents were the same, we hit the cache but the manifest is dirty and we need to update
// it to prevent doing a full file content comparison the next time around.
man.writeManifest() catch |err| {
log.warn("failed to write cache manifest when compiling '{s}': {s}", .{ win32_resource.src.src_path, @errorName(err) });
};
}
const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{rc_basename_noext});
win32_resource.status = .{
.success = .{
.res_path = try comp.local_cache_directory.join(comp.gpa, &[_][]const u8{
"o", &digest, res_basename,
}),
.lock = man.toOwnedLock(),
},
};
}
pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 {
const s = std.fs.path.sep_str;
const rand_int = std.crypto.random.int(u64);
@ -4350,7 +4839,7 @@ pub fn addCCArgs(
try argv.appendSlice(&[_][]const u8{ "-target", llvm_triple });
switch (ext) {
.c, .cpp, .m, .mm, .h, .cu => {
.c, .cpp, .m, .mm, .h, .cu, .rc => {
try argv.appendSlice(&[_][]const u8{
"-nostdinc",
"-fno-spell-checking",
@ -4378,9 +4867,16 @@ pub fn addCCArgs(
try argv.append("-isystem");
try argv.append(c_headers_dir);
for (comp.libc_include_dir_list) |include_dir| {
try argv.append("-isystem");
try argv.append(include_dir);
if (ext == .rc) {
for (comp.rc_include_dir_list) |include_dir| {
try argv.append("-isystem");
try argv.append(include_dir);
}
} else {
for (comp.libc_include_dir_list) |include_dir| {
try argv.append("-isystem");
try argv.append(include_dir);
}
}
if (target.cpu.model.llvm_name) |llvm_name| {
@ -4692,6 +5188,253 @@ fn failCObjWithOwnedErrorMsg(
return error.AnalysisFail;
}
/// The include directories used when preprocessing .rc files are separate from the
/// target. Which include directories are used is determined by `options.rc_includes`.
///
/// Note: It should be okay that the include directories used when compiling .rc
/// files differ from the include directories used when compiling the main
/// binary, since the .res format is not dependent on anything ABI-related. The
/// only relevant differences would be things like `#define` constants being
/// different in the MinGW headers vs the MSVC headers, but any such
/// differences would likely be a MinGW bug.
fn detectWin32ResourceIncludeDirs(arena: Allocator, options: InitOptions) !LibCDirs {
// Set the includes to .none here when there are no rc files to compile
var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none;
if (builtin.target.os.tag != .windows) {
switch (includes) {
// MSVC can't be found when the host isn't Windows, so short-circuit.
.msvc => return error.WindowsSdkNotFound,
// Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts.
.any => includes = .gnu,
.none, .gnu => {},
}
}
while (true) {
switch (includes) {
.any, .msvc => return detectLibCIncludeDirs(
arena,
options.zig_lib_directory.path.?,
.{
.cpu = options.target.cpu,
.os = options.target.os,
.abi = .msvc,
.ofmt = options.target.ofmt,
},
options.is_native_abi,
// The .rc preprocessor will need to know the libc include dirs even if we
// are not linking libc, so force 'link_libc' to true
true,
options.libc_installation,
) catch |err| {
if (includes == .any) {
// fall back to mingw
includes = .gnu;
continue;
}
return err;
},
.gnu => return detectLibCFromBuilding(arena, options.zig_lib_directory.path.?, .{
.cpu = options.target.cpu,
.os = options.target.os,
.abi = .gnu,
.ofmt = options.target.ofmt,
}),
.none => return LibCDirs{
.libc_include_dir_list = &[0][]u8{},
.libc_installation = null,
.libc_framework_dir_list = &.{},
.sysroot = null,
},
}
}
}
fn failWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, comptime format: []const u8, args: anytype) SemaError {
@setCold(true);
var bundle: ErrorBundle.Wip = undefined;
try bundle.init(comp.gpa);
errdefer bundle.deinit();
try bundle.addRootErrorMessage(.{
.msg = try bundle.printString(format, args),
.src_loc = try bundle.addSourceLocation(.{
.src_path = try bundle.addString(win32_resource.src.src_path),
.line = 0,
.column = 0,
.span_start = 0,
.span_main = 0,
.span_end = 0,
}),
});
const finished_bundle = try bundle.toOwnedBundle("");
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
}
fn failWin32ResourceWithOwnedBundle(
comp: *Compilation,
win32_resource: *Win32Resource,
err_bundle: ErrorBundle,
) SemaError {
@setCold(true);
{
comp.mutex.lock();
defer comp.mutex.unlock();
try comp.failed_win32_resources.putNoClobber(comp.gpa, win32_resource, err_bundle);
}
win32_resource.status = .failure;
return error.AnalysisFail;
}
fn failWin32ResourceCli(
comp: *Compilation,
win32_resource: *Win32Resource,
diagnostics: *resinator.cli.Diagnostics,
) SemaError {
@setCold(true);
var bundle: ErrorBundle.Wip = undefined;
try bundle.init(comp.gpa);
errdefer bundle.deinit();
try bundle.addRootErrorMessage(.{
.msg = try bundle.addString("invalid command line option(s)"),
.src_loc = try bundle.addSourceLocation(.{
.src_path = try bundle.addString(win32_resource.src.src_path),
.line = 0,
.column = 0,
.span_start = 0,
.span_main = 0,
.span_end = 0,
}),
});
var cur_err: ?ErrorBundle.ErrorMessage = null;
var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
defer cur_notes.deinit(comp.gpa);
for (diagnostics.errors.items) |err_details| {
switch (err_details.type) {
.err => {
if (cur_err) |err| {
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
}
cur_err = .{
.msg = try bundle.addString(err_details.msg.items),
};
cur_notes.clearRetainingCapacity();
},
.warning => cur_err = null,
.note => {
if (cur_err == null) continue;
cur_err.?.notes_len += 1;
try cur_notes.append(comp.gpa, .{
.msg = try bundle.addString(err_details.msg.items),
});
},
}
}
if (cur_err) |err| {
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
}
const finished_bundle = try bundle.toOwnedBundle("");
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
}
fn failWin32ResourceCompile(
comp: *Compilation,
win32_resource: *Win32Resource,
source: []const u8,
diagnostics: *resinator.errors.Diagnostics,
mappings: resinator.source_mapping.SourceMappings,
) SemaError {
@setCold(true);
var bundle: ErrorBundle.Wip = undefined;
try bundle.init(comp.gpa);
errdefer bundle.deinit();
var msg_buf: std.ArrayListUnmanaged(u8) = .{};
defer msg_buf.deinit(comp.gpa);
var cur_err: ?ErrorBundle.ErrorMessage = null;
var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
defer cur_notes.deinit(comp.gpa);
for (diagnostics.errors.items) |err_details| {
switch (err_details.type) {
.hint => continue,
// Clear the current error so that notes don't bleed into unassociated errors
.warning => {
cur_err = null;
continue;
},
.note => if (cur_err == null) continue,
.err => {},
}
const corresponding_span = mappings.get(err_details.token.line_number);
const corresponding_file = mappings.files.get(corresponding_span.filename_offset);
const source_line_start = err_details.token.getLineStart(source);
const column = err_details.token.calculateColumn(source, 1, source_line_start);
const err_line = corresponding_span.start_line;
msg_buf.clearRetainingCapacity();
try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items);
const src_loc = src_loc: {
var src_loc: ErrorBundle.SourceLocation = .{
.src_path = try bundle.addString(corresponding_file),
.line = @intCast(err_line - 1), // 1-based -> 0-based
.column = @intCast(column),
.span_start = 0,
.span_main = 0,
.span_end = 0,
};
if (err_details.print_source_line) {
const source_line = err_details.token.getLine(source, source_line_start);
const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len);
src_loc.span_main = @intCast(visual_info.point_offset);
src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len);
src_loc.source_line = try bundle.addString(source_line);
}
break :src_loc try bundle.addSourceLocation(src_loc);
};
switch (err_details.type) {
.err => {
if (cur_err) |err| {
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
}
cur_err = .{
.msg = try bundle.addString(msg_buf.items),
.src_loc = src_loc,
};
cur_notes.clearRetainingCapacity();
},
.note => {
cur_err.?.notes_len += 1;
try cur_notes.append(comp.gpa, .{
.msg = try bundle.addString(msg_buf.items),
.src_loc = src_loc,
});
},
.warning, .hint => unreachable,
}
}
if (cur_err) |err| {
try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
}
const finished_bundle = try bundle.toOwnedBundle("");
return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
}
fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void {
try wip.addRootErrorMessage(msg);
const notes_start = try wip.reserveNotes(@intCast(notes.len));
for (notes_start.., notes) |i, note| {
wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note));
}
}
pub const FileExt = enum {
c,
cpp,
@ -4708,6 +5451,7 @@ pub const FileExt = enum {
static_library,
zig,
def,
rc,
res,
unknown,
@ -4724,6 +5468,7 @@ pub const FileExt = enum {
.static_library,
.zig,
.def,
.rc,
.res,
.unknown,
=> false,
@ -4747,6 +5492,7 @@ pub const FileExt = enum {
.static_library => target.staticLibSuffix(),
.zig => ".zig",
.def => ".def",
.rc => ".rc",
.res => ".res",
.unknown => "",
};
@ -4839,7 +5585,9 @@ pub fn classifyFileExt(filename: []const u8) FileExt {
return .cu;
} else if (mem.endsWith(u8, filename, ".def")) {
return .def;
} else if (mem.endsWith(u8, filename, ".res")) {
} else if (std.ascii.endsWithIgnoreCase(filename, ".rc")) {
return .rc;
} else if (std.ascii.endsWithIgnoreCase(filename, ".res")) {
return .res;
} else {
return .unknown;
@ -4983,6 +5731,13 @@ fn detectLibCFromLibCInstallation(arena: Allocator, target: Target, lci: *const
if (!is_redundant) list.appendAssumeCapacity(lci.sys_include_dir.?);
if (target.os.tag == .windows) {
if (std.fs.path.dirname(lci.sys_include_dir.?)) |sys_include_dir_parent| {
// This include path will only exist when the optional "Desktop development with C++"
// is installed. It contains headers, .rc files, and resources. It is especially
// necessary when working with Windows resources.
const atlmfc_dir = try std.fs.path.join(arena, &[_][]const u8{ sys_include_dir_parent, "atlmfc", "include" });
list.appendAssumeCapacity(atlmfc_dir);
}
if (std.fs.path.dirname(lci.include_dir.?)) |include_dir_parent| {
const um_dir = try std.fs.path.join(arena, &[_][]const u8{ include_dir_parent, "um" });
list.appendAssumeCapacity(um_dir);

View File

@ -1027,6 +1027,9 @@ pub const File = struct {
for (comp.c_object_table.keys()) |key| {
_ = try man.addFile(key.status.success.object_path, null);
}
for (comp.win32_resource_table.keys()) |key| {
_ = try man.addFile(key.status.success.res_path, null);
}
try man.addOptionalFile(module_obj_path);
try man.addOptionalFile(compiler_rt_path);
@ -1056,7 +1059,7 @@ pub const File = struct {
};
}
const num_object_files = base.options.objects.len + comp.c_object_table.count() + 2;
const num_object_files = base.options.objects.len + comp.c_object_table.count() + comp.win32_resource_table.count() + 2;
var object_files = try std.ArrayList([*:0]const u8).initCapacity(base.allocator, num_object_files);
defer object_files.deinit();
@ -1066,6 +1069,9 @@ pub const File = struct {
for (comp.c_object_table.keys()) |key| {
object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.object_path));
}
for (comp.win32_resource_table.keys()) |key| {
object_files.appendAssumeCapacity(try arena.dupeZ(u8, key.status.success.res_path));
}
if (module_obj_path) |p| {
object_files.appendAssumeCapacity(try arena.dupeZ(u8, p));
}

View File

@ -72,6 +72,9 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
for (comp.c_object_table.keys()) |key| {
_ = try man.addFile(key.status.success.object_path, null);
}
for (comp.win32_resource_table.keys()) |key| {
_ = try man.addFile(key.status.success.res_path, null);
}
try man.addOptionalFile(module_obj_path);
man.hash.addOptionalBytes(self.base.options.entry);
man.hash.addOptional(self.base.options.stack_size_override);
@ -268,6 +271,10 @@ pub fn linkWithLLD(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
try argv.append(key.status.success.object_path);
}
for (comp.win32_resource_table.keys()) |key| {
try argv.append(key.status.success.res_path);
}
if (module_obj_path) |p| {
try argv.append(p);
}

View File

@ -472,6 +472,12 @@ const usage_build_generic =
\\ -D[macro]=[value] Define C [macro] to [value] (1 if [value] omitted)
\\ --libc [file] Provide a file which specifies libc paths
\\ -cflags [flags] -- Set extra flags for the next positional C source files
\\ -rcflags [flags] -- Set extra flags for the next positional .rc source files
\\ -rcincludes=[type] Set the type of includes to use when compiling .rc source files
\\ any (default) Use msvc if available, fall back to gnu
\\ msvc Use msvc include paths (must be present on the system)
\\ gnu Use mingw include paths (distributed with Zig)
\\ none Do not use any autodetected include paths
\\
\\Link Options:
\\ -l[lib], --library [lib] Link against system library (only if actually used)
@ -919,11 +925,15 @@ fn buildOutputType(
var wasi_emulated_libs = std.ArrayList(wasi_libc.CRTFile).init(arena);
var clang_argv = std.ArrayList([]const u8).init(arena);
var extra_cflags = std.ArrayList([]const u8).init(arena);
var extra_rcflags = std.ArrayList([]const u8).init(arena);
// These are before resolving sysroot.
var lib_dir_args = std.ArrayList([]const u8).init(arena);
var rpath_list = std.ArrayList([]const u8).init(arena);
var symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .{};
var c_source_files = std.ArrayList(Compilation.CSourceFile).init(arena);
var rc_source_files = std.ArrayList(Compilation.RcSourceFile).init(arena);
var rc_includes: Compilation.RcIncludes = .any;
var res_files = std.ArrayList(Compilation.LinkObject).init(arena);
var link_objects = std.ArrayList(Compilation.LinkObject).init(arena);
var framework_dirs = std.ArrayList([]const u8).init(arena);
var frameworks: std.StringArrayHashMapUnmanaged(Framework) = .{};
@ -1042,6 +1052,19 @@ fn buildOutputType(
if (mem.eql(u8, next_arg, "--")) break;
try extra_cflags.append(next_arg);
}
} else if (mem.eql(u8, arg, "-rcincludes")) {
rc_includes = parseRcIncludes(args_iter.nextOrFatal());
} else if (mem.startsWith(u8, arg, "-rcincludes=")) {
rc_includes = parseRcIncludes(arg["-rcincludes=".len..]);
} else if (mem.eql(u8, arg, "-rcflags")) {
extra_rcflags.shrinkRetainingCapacity(0);
while (true) {
const next_arg = args_iter.next() orelse {
fatal("expected -- after -rcflags", .{});
};
if (mem.eql(u8, next_arg, "--")) break;
try extra_rcflags.append(next_arg);
}
} else if (mem.eql(u8, arg, "--color")) {
const next_arg = args_iter.next() orelse {
fatal("expected [auto|on|off] after --color", .{});
@ -1590,7 +1613,8 @@ fn buildOutputType(
}
} else switch (file_ext orelse
Compilation.classifyFileExt(arg)) {
.object, .static_library, .shared_library, .res => try link_objects.append(.{ .path = arg }),
.object, .static_library, .shared_library => try link_objects.append(.{ .path = arg }),
.res => try res_files.append(.{ .path = arg }),
.assembly, .assembly_with_cpp, .c, .cpp, .h, .ll, .bc, .m, .mm, .cu => {
try c_source_files.append(.{
.src_path = arg,
@ -1599,6 +1623,12 @@ fn buildOutputType(
.ext = file_ext,
});
},
.rc => {
try rc_source_files.append(.{
.src_path = arg,
.extra_flags = try arena.dupe([]const u8, extra_rcflags.items),
});
},
.zig => {
if (root_src_file) |other| {
fatal("found another zig file '{s}' after root source file '{s}'", .{ arg, other });
@ -1684,13 +1714,20 @@ fn buildOutputType(
.ext = file_ext, // duped while parsing the args.
});
},
.unknown, .shared_library, .object, .static_library, .res => try link_objects.append(.{
.unknown, .shared_library, .object, .static_library => try link_objects.append(.{
.path = it.only_arg,
.must_link = must_link,
}),
.res => try res_files.append(.{
.path = it.only_arg,
.must_link = must_link,
}),
.def => {
linker_module_definition_file = it.only_arg;
},
.rc => {
try rc_source_files.append(.{ .src_path = it.only_arg });
},
.zig => {
if (root_src_file) |other| {
fatal("found another zig file '{s}' after root source file '{s}'", .{ it.only_arg, other });
@ -2452,6 +2489,12 @@ fn buildOutputType(
} else if (emit_bin == .yes) {
const basename = fs.path.basename(emit_bin.yes);
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
} else if (rc_source_files.items.len >= 1) {
const basename = fs.path.basename(rc_source_files.items[0].src_path);
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
} else if (res_files.items.len >= 1) {
const basename = fs.path.basename(res_files.items[0].path);
break :blk basename[0 .. basename.len - fs.path.extension(basename).len];
} else if (show_builtin) {
break :blk "builtin";
} else if (arg_mode == .run) {
@ -2530,6 +2573,21 @@ fn buildOutputType(
link_libcpp = true;
}
if (target_info.target.ofmt == .coff) {
// Now that we know the target supports resources,
// we can add the res files as link objects.
for (res_files.items) |res_file| {
try link_objects.append(res_file);
}
} else {
if (rc_source_files.items.len != 0) {
fatal("rc files are not allowed unless the target object format is coff (Windows/UEFI)", .{});
}
if (res_files.items.len != 0) {
fatal("res files are not allowed unless the target object format is coff (Windows/UEFI)", .{});
}
}
if (target_info.target.cpu.arch.isWasm()) blk: {
if (single_threaded == null) {
single_threaded = true;
@ -2933,6 +2991,7 @@ fn buildOutputType(
if (output_mode == .Obj and (object_format == .coff or object_format == .macho)) {
const total_obj_count = c_source_files.items.len +
@intFromBool(root_src_file != null) +
rc_source_files.items.len +
link_objects.items.len;
if (total_obj_count > 1) {
fatal("{s} does not support linking multiple objects into one", .{@tagName(object_format)});
@ -3319,6 +3378,8 @@ fn buildOutputType(
.rpath_list = rpath_list.items,
.symbol_wrap_set = symbol_wrap_set,
.c_source_files = c_source_files.items,
.rc_source_files = rc_source_files.items,
.rc_includes = rc_includes,
.link_objects = link_objects.items,
.framework_dirs = framework_dirs.items,
.frameworks = resolved_frameworks.items,
@ -6478,3 +6539,8 @@ fn accessFrameworkPath(
return false;
}
fn parseRcIncludes(arg: []const u8) Compilation.RcIncludes {
return std.meta.stringToEnum(Compilation.RcIncludes, arg) orelse
fatal("unsupported rc includes type: '{s}'", .{arg});
}

18
src/resinator.zig Normal file
View File

@ -0,0 +1,18 @@
pub const ani = @import("resinator/ani.zig");
pub const ast = @import("resinator/ast.zig");
pub const bmp = @import("resinator/bmp.zig");
pub const cli = @import("resinator/cli.zig");
pub const code_pages = @import("resinator/code_pages.zig");
pub const comments = @import("resinator/comments.zig");
pub const compile = @import("resinator/compile.zig");
pub const errors = @import("resinator/errors.zig");
pub const ico = @import("resinator/ico.zig");
pub const lang = @import("resinator/lang.zig");
pub const lex = @import("resinator/lex.zig");
pub const literals = @import("resinator/literals.zig");
pub const parse = @import("resinator/parse.zig");
pub const rc = @import("resinator/rc.zig");
pub const res = @import("resinator/res.zig");
pub const source_mapping = @import("resinator/source_mapping.zig");
pub const utils = @import("resinator/utils.zig");
pub const windows1252 = @import("resinator/windows1252.zig");

58
src/resinator/ani.zig Normal file
View File

@ -0,0 +1,58 @@
//! https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
//! https://www.moon-soft.com/program/format/windows/ani.htm
//! https://www.gdgsoft.com/anituner/help/aniformat.htm
//! https://www.lomont.org/software/aniexploit/ExploitANI.pdf
//!
//! RIFF( 'ACON'
//! [LIST( 'INFO' <info_data> )]
//! [<DISP_ck>]
//! anih( <ani_header> )
//! [rate( <rate_info> )]
//! ['seq '( <sequence_info> )]
//! LIST( 'fram' icon( <icon_file> ) ... )
//! )
const std = @import("std");
const AF_ICON: u32 = 1;
pub fn isAnimatedIcon(reader: anytype) bool {
const flags = getAniheaderFlags(reader) catch return false;
return flags & AF_ICON == AF_ICON;
}
fn getAniheaderFlags(reader: anytype) !u32 {
const riff_header = try reader.readBytesNoEof(4);
if (!std.mem.eql(u8, &riff_header, "RIFF")) return error.InvalidFormat;
_ = try reader.readIntLittle(u32); // size of RIFF chunk
const form_type = try reader.readBytesNoEof(4);
if (!std.mem.eql(u8, &form_type, "ACON")) return error.InvalidFormat;
while (true) {
const chunk_id = try reader.readBytesNoEof(4);
const chunk_len = try reader.readIntLittle(u32);
if (!std.mem.eql(u8, &chunk_id, "anih")) {
// TODO: Move file cursor instead of skipBytes
try reader.skipBytes(chunk_len, .{});
continue;
}
const aniheader = try reader.readStruct(ANIHEADER);
return std.mem.nativeToLittle(u32, aniheader.flags);
}
}
/// From Microsoft Multimedia Data Standards Update April 15, 1994
const ANIHEADER = extern struct {
cbSizeof: u32,
cFrames: u32,
cSteps: u32,
cx: u32,
cy: u32,
cBitCount: u32,
cPlanes: u32,
jifRate: u32,
flags: u32,
};

1084
src/resinator/ast.zig Normal file

File diff suppressed because it is too large Load Diff

268
src/resinator/bmp.zig Normal file
View File

@ -0,0 +1,268 @@
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader
//! https://learn.microsoft.com/en-us/previous-versions//dd183376(v=vs.85)
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfo
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader
//! https://archive.org/details/mac_Graphics_File_Formats_Second_Edition_1996/page/n607/mode/2up
//! https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapv5header
//!
//! Notes:
//! - The Microsoft documentation is incredibly unclear about the color table when the
//! bit depth is >= 16.
//! + For bit depth 24 it says "the bmiColors member of BITMAPINFO is NULL" but also
//! says "the bmiColors color table is used for optimizing colors used on palette-based
//! devices, and must contain the number of entries specified by the bV5ClrUsed member"
//! + For bit depth 16 and 32, it seems to imply that if the compression is BI_BITFIELDS
//! or BI_ALPHABITFIELDS, then the color table *only* consists of the bit masks, but
//! doesn't really say this outright and the Wikipedia article seems to disagree
//! For the purposes of this implementation, color tables can always be present for any
//! bit depth and compression, and the color table follows the header + any optional
//! bit mask fields dictated by the specified compression.
const std = @import("std");
const BitmapHeader = @import("ico.zig").BitmapHeader;
pub const windows_format_id = std.mem.readIntNative(u16, "BM");
pub const file_header_len = 14;
pub const ReadError = error{
UnexpectedEOF,
InvalidFileHeader,
ImpossiblePixelDataOffset,
UnknownBitmapVersion,
InvalidBitsPerPixel,
TooManyColorsInPalette,
MissingBitfieldMasks,
};
pub const BitmapInfo = struct {
dib_header_size: u32,
/// Contains the interpreted number of colors in the palette (e.g.
/// if the field's value is zero and the bit depth is <= 8, this
/// will contain the maximum number of colors for the bit depth
/// rather than the field's value directly).
colors_in_palette: u32,
bytes_per_color_palette_element: u8,
pixel_data_offset: u32,
compression: Compression,
pub fn getExpectedPaletteByteLen(self: *const BitmapInfo) u64 {
return @as(u64, self.colors_in_palette) * self.bytes_per_color_palette_element;
}
pub fn getActualPaletteByteLen(self: *const BitmapInfo) u64 {
return self.getByteLenBetweenHeadersAndPixels() - self.getBitmasksByteLen();
}
pub fn getByteLenBetweenHeadersAndPixels(self: *const BitmapInfo) u64 {
return @as(u64, self.pixel_data_offset) - self.dib_header_size - file_header_len;
}
pub fn getBitmasksByteLen(self: *const BitmapInfo) u8 {
return switch (self.compression) {
.BI_BITFIELDS => 12,
.BI_ALPHABITFIELDS => 16,
else => 0,
};
}
pub fn getMissingPaletteByteLen(self: *const BitmapInfo) u64 {
if (self.getActualPaletteByteLen() >= self.getExpectedPaletteByteLen()) return 0;
return self.getExpectedPaletteByteLen() - self.getActualPaletteByteLen();
}
/// Returns the full byte len of the DIB header + optional bitmasks + color palette
pub fn getExpectedByteLenBeforePixelData(self: *const BitmapInfo) u64 {
return @as(u64, self.dib_header_size) + self.getBitmasksByteLen() + self.getExpectedPaletteByteLen();
}
/// Returns the full expected byte len
pub fn getExpectedByteLen(self: *const BitmapInfo, file_size: u64) u64 {
return self.getExpectedByteLenBeforePixelData() + self.getPixelDataLen(file_size);
}
pub fn getPixelDataLen(self: *const BitmapInfo, file_size: u64) u64 {
return file_size - self.pixel_data_offset;
}
};
pub fn read(reader: anytype, max_size: u64) ReadError!BitmapInfo {
var bitmap_info: BitmapInfo = undefined;
const file_header = reader.readBytesNoEof(file_header_len) catch return error.UnexpectedEOF;
const id = std.mem.readIntNative(u16, file_header[0..2]);
if (id != windows_format_id) return error.InvalidFileHeader;
bitmap_info.pixel_data_offset = std.mem.readIntNative(u32, file_header[10..14]);
if (bitmap_info.pixel_data_offset > max_size) return error.ImpossiblePixelDataOffset;
bitmap_info.dib_header_size = reader.readIntLittle(u32) catch return error.UnexpectedEOF;
if (bitmap_info.pixel_data_offset < file_header_len + bitmap_info.dib_header_size) return error.ImpossiblePixelDataOffset;
const dib_version = BitmapHeader.Version.get(bitmap_info.dib_header_size);
switch (dib_version) {
.@"nt3.1", .@"nt4.0", .@"nt5.0" => {
var dib_header_buf: [@sizeOf(BITMAPINFOHEADER)]u8 align(@alignOf(BITMAPINFOHEADER)) = undefined;
std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size);
reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF;
var dib_header: *BITMAPINFOHEADER = @ptrCast(&dib_header_buf);
structFieldsLittleToNative(BITMAPINFOHEADER, dib_header);
bitmap_info.colors_in_palette = try dib_header.numColorsInTable();
bitmap_info.bytes_per_color_palette_element = 4;
bitmap_info.compression = @enumFromInt(dib_header.biCompression);
if (bitmap_info.getByteLenBetweenHeadersAndPixels() < bitmap_info.getBitmasksByteLen()) {
return error.MissingBitfieldMasks;
}
},
.@"win2.0" => {
var dib_header_buf: [@sizeOf(BITMAPCOREHEADER)]u8 align(@alignOf(BITMAPCOREHEADER)) = undefined;
std.mem.writeIntLittle(u32, dib_header_buf[0..4], bitmap_info.dib_header_size);
reader.readNoEof(dib_header_buf[4..]) catch return error.UnexpectedEOF;
var dib_header: *BITMAPCOREHEADER = @ptrCast(&dib_header_buf);
structFieldsLittleToNative(BITMAPCOREHEADER, dib_header);
// > The size of the color palette is calculated from the BitsPerPixel value.
// > The color palette has 2, 16, 256, or 0 entries for a BitsPerPixel of
// > 1, 4, 8, and 24, respectively.
bitmap_info.colors_in_palette = switch (dib_header.bcBitCount) {
inline 1, 4, 8 => |bit_count| 1 << bit_count,
24 => 0,
else => return error.InvalidBitsPerPixel,
};
bitmap_info.bytes_per_color_palette_element = 3;
bitmap_info.compression = .BI_RGB;
},
.unknown => return error.UnknownBitmapVersion,
}
return bitmap_info;
}
/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapcoreheader
pub const BITMAPCOREHEADER = extern struct {
bcSize: u32,
bcWidth: u16,
bcHeight: u16,
bcPlanes: u16,
bcBitCount: u16,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader
pub const BITMAPINFOHEADER = extern struct {
bcSize: u32,
biWidth: i32,
biHeight: i32,
biPlanes: u16,
biBitCount: u16,
biCompression: u32,
biSizeImage: u32,
biXPelsPerMeter: i32,
biYPelsPerMeter: i32,
biClrUsed: u32,
biClrImportant: u32,
/// Returns error.TooManyColorsInPalette if the number of colors specified
/// exceeds the number of possible colors referenced in the pixel data (i.e.
/// if 1 bit is used per pixel, then the color table can't have more than 2 colors
/// since any more couldn't possibly be indexed in the pixel data)
///
/// Returns error.InvalidBitsPerPixel if the bit depth is not 1, 4, 8, 16, 24, or 32.
pub fn numColorsInTable(self: BITMAPINFOHEADER) !u32 {
switch (self.biBitCount) {
inline 1, 4, 8 => |bit_count| switch (self.biClrUsed) {
// > If biClrUsed is zero, the array contains the maximum number of
// > colors for the given bitdepth; that is, 2^biBitCount colors
0 => return 1 << bit_count,
// > If biClrUsed is nonzero and the biBitCount member is less than 16,
// > the biClrUsed member specifies the actual number of colors the
// > graphics engine or device driver accesses.
else => {
const max_colors = 1 << bit_count;
if (self.biClrUsed > max_colors) {
return error.TooManyColorsInPalette;
}
return self.biClrUsed;
},
},
// > If biBitCount is 16 or greater, the biClrUsed member specifies
// > the size of the color table used to optimize performance of the
// > system color palettes.
//
// Note: Bit depths >= 16 only use the color table 'for optimizing colors
// used on palette-based devices', but it still makes sense to limit their
// colors since the pixel data is still limited to this number of colors
// (i.e. even though the color table is not indexed by the pixel data,
// the color table having more colors than the pixel data can represent
// would never make sense and indicates a malformed bitmap).
inline 16, 24, 32 => |bit_count| {
const max_colors = 1 << bit_count;
if (self.biClrUsed > max_colors) {
return error.TooManyColorsInPalette;
}
return self.biClrUsed;
},
else => return error.InvalidBitsPerPixel,
}
}
};
pub const Compression = enum(u32) {
BI_RGB = 0,
BI_RLE8 = 1,
BI_RLE4 = 2,
BI_BITFIELDS = 3,
BI_JPEG = 4,
BI_PNG = 5,
BI_ALPHABITFIELDS = 6,
BI_CMYK = 11,
BI_CMYKRLE8 = 12,
BI_CMYKRLE4 = 13,
_,
};
fn structFieldsLittleToNative(comptime T: type, x: *T) void {
inline for (@typeInfo(T).Struct.fields) |field| {
@field(x, field.name) = std.mem.littleToNative(field.type, @field(x, field.name));
}
}
test "read" {
var bmp_data = "BM<\x00\x00\x00\x00\x00\x00\x006\x00\x00\x00(\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x00\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00\x12\x0b\x00\x00\x12\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x7f\x00\x00\x00\x00".*;
var fbs = std.io.fixedBufferStream(&bmp_data);
{
const bitmap = try read(fbs.reader(), bmp_data.len);
try std.testing.expectEqual(@as(u32, BitmapHeader.Version.@"nt3.1".len()), bitmap.dib_header_size);
}
{
fbs.reset();
bmp_data[file_header_len] = 11;
try std.testing.expectError(error.UnknownBitmapVersion, read(fbs.reader(), bmp_data.len));
// restore
bmp_data[file_header_len] = BitmapHeader.Version.@"nt3.1".len();
}
{
fbs.reset();
bmp_data[0] = 'b';
try std.testing.expectError(error.InvalidFileHeader, read(fbs.reader(), bmp_data.len));
// restore
bmp_data[0] = 'B';
}
{
const cutoff_len = file_header_len + BitmapHeader.Version.@"nt3.1".len() - 1;
var dib_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]);
try std.testing.expectError(error.UnexpectedEOF, read(dib_cutoff_fbs.reader(), bmp_data.len));
}
{
const cutoff_len = file_header_len - 1;
var bmp_cutoff_fbs = std.io.fixedBufferStream(bmp_data[0..cutoff_len]);
try std.testing.expectError(error.UnexpectedEOF, read(bmp_cutoff_fbs.reader(), bmp_data.len));
}
}

1433
src/resinator/cli.zig Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,487 @@
const std = @import("std");
const windows1252 = @import("windows1252.zig");
// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing
// than it is to the stuff in this file.
//
// representations for context:
// Win-1252 89
// UTF-8 E2 80 B0
// UTF-16 20 30
//
// With code page 65001:
// RCDATA { "" L"" }
// File encoded as Windows-1252:
// => <U+FFFD REPLACEMENT CHARACTER> as u16
// "" => 0x3F ('?')
// L"" => <U+FFFD REPLACEMENT CHARACTER> as u16
// File encoded as UTF-8:
// => <U+2030 > as u16
// "" => 0x89 ('‰' encoded as Windows-1252)
// L"" => <U+2030 > as u16
//
// With code page 1252:
// RCDATA { "" L"" }
// File encoded as Windows-1252:
// => <U+2030 > as u16
// "" => 0x89 ('‰' encoded as Windows-1252)
// L"" => <U+2030 > as u16
// File encoded as UTF-8:
// => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16
// ^ first byte of utf8 representation
// ^ second byte of UTF-8 representation (0x80), but interpretted as
// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>)
// ^ third byte of utf8 representation
// "" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation)
// L"" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see ' =>' explanation)
//
// With code page 1252:
// <0x90> RCDATA { "<0x90>" L"<0x90>" }
// File encoded as Windows-1252:
// <0x90> => 0x90 as u16
// "<0x90>" => 0x90
// L"<0x90>" => 0x90 as u16
// File encoded as UTF-8:
// <0x90> => 0xC2 as u16, 0x90 as u16
// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>)
// L"<0x90>" => 0xC2 as u16, 0x90 as u16
//
// Within a raw data block, file encoded as Windows-1252 (Â is <0xC2>):
// "Âa" L"Âa" "\xC2ad" L"\xC2AD"
// With code page 1252:
// C2 61 C2 00 61 00 C2 61 64 AD C2
// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD
// \xC2~`
// With code page 65001:
// 3F 61 FD FF 61 00 C2 61 64 AD C2
// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD
// `. `. `~\xC2
// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it).
// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the
// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>.
// `~Same as ^ but converted to '?' instead.
//
// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, is <0x80>):
// "ð€a" L"ð€a"
// With code page 1252:
// F0 80 61 F0 00 AC 20 61 00
// ð^ ^ a^ ð~~~^ ~~~^ a~~~^
// With code page 65001:
// 3F 61 FD FF 61 00
// ^. a^ ^~~~. a~~~^
// `. `.
// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so
// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>'
// `~Same as ^ but converted to '?' instead.
/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
pub const CodePage = enum(u16) {
// supported
windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows)
utf8 = 65001, // utf-8 Unicode (UTF-8)
// unsupported but valid
ibm037 = 37, // IBM037 IBM EBCDIC US-Canada
ibm437 = 437, // IBM437 OEM United States
ibm500 = 500, // IBM500 IBM EBCDIC International
asmo708 = 708, // ASMO-708 Arabic (ASMO 708)
asmo449plus = 709, // Arabic (ASMO-449+, BCON V4)
transparent_arabic = 710, // Arabic - Transparent Arabic
dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS)
ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS)
ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS)
ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS)
ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian)
ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS)
ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol
ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS)
ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS)
dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS)
ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS)
ibm864 = 864, // IBM864 OEM Arabic; Arabic (864)
ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS)
cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS)
ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS)
ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
windows874 = 874, // windows-874 Thai (Windows)
cp875 = 875, // cp875 IBM EBCDIC Greek Modern
shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS)
gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code)
big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5)
ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System
ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications
windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows)
windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows)
windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows)
windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows)
windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows)
windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows)
windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows)
windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows)
johab = 1361, // Johab Korean (Johab)
macintosh = 10000, // macintosh MAC Roman; Western European (Mac)
x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac)
x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
x_mac_korean = 10003, // x-mac-korean Korean (Mac)
x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac)
x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac)
x_mac_greek = 10006, // x-mac-greek Greek (Mac)
x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac)
x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac)
x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac)
x_mac_thai = 10021, // x-mac-thai Thai (Mac)
x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac)
x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac)
x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac)
x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac)
utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
x_cp20001 = 20001, // x-cp20001 TCA Taiwan
x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan
x_cp20004 = 20004, // x-cp20004 TeleText Taiwan
x_cp20005 = 20005, // x-cp20005 Wang Taiwan
x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit)
x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit)
x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit)
us_ascii = 20127, // us-ascii US-ASCII (7-bit)
x_cp20261 = 20261, // x-cp20261 T.61
x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent
ibm273 = 20273, // IBM273 IBM EBCDIC Germany
ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway
ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden
ibm280 = 20280, // IBM280 IBM EBCDIC Italy
ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain
ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom
ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended
ibm297 = 20297, // IBM297 IBM EBCDIC France
ibm420 = 20420, // IBM420 IBM EBCDIC Arabic
ibm423 = 20423, // IBM423 IBM EBCDIC Greek
ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew
x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai
koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic
ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian
ibm905 = 20905, // IBM905 IBM EBCDIC Turkish
ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
x_cp20949 = 20949, // x-cp20949 Korean Wansung
cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
// = 21027, // (deprecated)
koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3
iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic
iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic
iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic
iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek
iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish
iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian
iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9
x_europa = 29001, // x-Europa Europa 3
is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean
x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese
ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended
ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese
ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean
ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese
ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese
ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese
ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese
euc_jp = 51932, // euc-jp EUC Japanese
euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
euc_kr = 51949, // euc-kr EUC Korean
euc_chinesetrad = 51950, // EUC Traditional Chinese
hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
x_iscii_de = 57002, // x-iscii-de ISCII Devanagari
x_iscii_be = 57003, // x-iscii-be ISCII Bangla
x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil
x_iscii_te = 57005, // x-iscii-te ISCII Telugu
x_iscii_as = 57006, // x-iscii-as ISCII Assamese
x_iscii_or = 57007, // x-iscii-or ISCII Odia
x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada
x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam
x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati
x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi
utf7 = 65000, // utf-7 Unicode (UTF-7)
pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint {
if (index >= bytes.len) return null;
switch (code_page) {
.windows1252 => {
// All byte values have a representation, so just convert the byte
return Codepoint{
.value = windows1252.toCodepoint(bytes[index]),
.byte_len = 1,
};
},
.utf8 => {
return Utf8.WellFormedDecoder.decode(bytes[index..]);
},
else => unreachable,
}
}
pub fn isSupported(code_page: CodePage) bool {
return switch (code_page) {
.windows1252, .utf8 => true,
else => false,
};
}
pub fn getByIdentifier(identifier: u16) !CodePage {
// There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but
// this should be fine, especially since this function likely won't be called much.
inline for (@typeInfo(CodePage).Enum.fields) |enumField| {
if (identifier == enumField.value) {
return @field(CodePage, enumField.name);
}
}
return error.InvalidCodePage;
}
pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage {
const code_page = try getByIdentifier(identifier);
switch (isSupported(code_page)) {
true => return code_page,
false => return error.UnsupportedCodePage,
}
}
};
pub const Utf8 = struct {
/// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
/// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
pub const WellFormedDecoder = struct {
/// Like std.unicode.utf8ByteSequenceLength, but:
/// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
/// - Returns an optional value instead of an error union
pub fn sequenceLength(first_byte: u8) ?u3 {
return switch (first_byte) {
0x00...0x7F => 1,
0xC2...0xDF => 2,
0xE0...0xEF => 3,
0xF0...0xF4 => 4,
else => null,
};
}
fn isContinuationByte(byte: u8) bool {
return switch (byte) {
0x80...0xBF => true,
else => false,
};
}
pub fn decode(bytes: []const u8) Codepoint {
std.debug.assert(bytes.len > 0);
var first_byte = bytes[0];
var expected_len = sequenceLength(first_byte) orelse {
return .{ .value = Codepoint.invalid, .byte_len = 1 };
};
if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 };
var value: u21 = first_byte & 0b00011111;
var byte_index: u8 = 1;
while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) {
const byte = bytes[byte_index];
// See Table 3-7 of D92 in Chapter 3 of the Unicode Standard
const valid: bool = switch (byte_index) {
1 => switch (first_byte) {
0xE0 => switch (byte) {
0xA0...0xBF => true,
else => false,
},
0xED => switch (byte) {
0x80...0x9F => true,
else => false,
},
0xF0 => switch (byte) {
0x90...0xBF => true,
else => false,
},
0xF4 => switch (byte) {
0x80...0x8F => true,
else => false,
},
else => switch (byte) {
0x80...0xBF => true,
else => false,
},
},
else => switch (byte) {
0x80...0xBF => true,
else => false,
},
};
if (!valid) {
var len = byte_index;
// Only include the byte in the invalid sequence if it's in the range
// of a continuation byte. All other values should not be included in the
// invalid sequence.
//
// Note: This is how the Windows RC compiler handles this, this may not
// be the correct-as-according-to-the-Unicode-standard way to do it.
if (isContinuationByte(byte)) len += 1;
return .{ .value = Codepoint.invalid, .byte_len = len };
}
value <<= 6;
value |= byte & 0b00111111;
}
if (byte_index != expected_len) {
return .{ .value = Codepoint.invalid, .byte_len = byte_index };
}
return .{ .value = value, .byte_len = expected_len };
}
};
};
test "Utf8.WellFormedDecoder" {
const invalid_utf8 = "\xF0\x80";
var decoded = Utf8.WellFormedDecoder.decode(invalid_utf8);
try std.testing.expectEqual(Codepoint.invalid, decoded.value);
try std.testing.expectEqual(@as(usize, 2), decoded.byte_len);
}
test "codepointAt invalid utf8" {
{
const invalid_utf8 = "\xf0\xf0\x80\x80\x80";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 2,
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(3, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(4, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8));
}
{
const invalid_utf8 = "\xE1\xA0\xC0";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 2,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(2, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8));
}
{
const invalid_utf8 = "\xD2";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8));
}
{
const invalid_utf8 = "\xE1\xA0";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 2,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
}
{
const invalid_utf8 = "\xC5\xFF";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
}
}
test "codepointAt utf8 encoded" {
const utf8_encoded = "²";
// with code page utf8
try std.testing.expectEqual(Codepoint{
.value = '²',
.byte_len = 2,
}, CodePage.utf8.codepointAt(0, utf8_encoded).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded));
// with code page windows1252
try std.testing.expectEqual(Codepoint{
.value = '\xC2',
.byte_len = 1,
}, CodePage.windows1252.codepointAt(0, utf8_encoded).?);
try std.testing.expectEqual(Codepoint{
.value = '\xB2',
.byte_len = 1,
}, CodePage.windows1252.codepointAt(1, utf8_encoded).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded));
}
test "codepointAt windows1252 encoded" {
const windows1252_encoded = "\xB2";
// with code page utf8
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(0, windows1252_encoded).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded));
// with code page windows1252
try std.testing.expectEqual(Codepoint{
.value = '\xB2',
.byte_len = 1,
}, CodePage.windows1252.codepointAt(0, windows1252_encoded).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded));
}
pub const Codepoint = struct {
value: u21,
byte_len: usize,
pub const invalid: u21 = std.math.maxInt(u21);
};

340
src/resinator/comments.zig Normal file
View File

@ -0,0 +1,340 @@
//! Expects to run after a C preprocessor step that preserves comments.
//!
//! `rc` has a peculiar quirk where something like `blah/**/blah` will be
//! transformed into `blahblah` during parsing. However, `clang -E` will
//! transform it into `blah blah`, so in order to match `rc`, we need
//! to remove comments ourselves after the preprocessor runs.
//! Note: Multiline comments that actually span more than one line do
//! get translated to a space character by `rc`.
//!
//! Removing comments before lexing also allows the lexer to not have to
//! deal with comments which would complicate its implementation (this is something
//! of a tradeoff, as removing comments in a separate pass means that we'll
//! need to iterate the source twice instead of once, but having to deal with
//! comments when lexing would be a pain).
const std = @import("std");
const Allocator = std.mem.Allocator;
const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
const SourceMappings = @import("source_mapping.zig").SourceMappings;
const LineHandler = @import("lex.zig").LineHandler;
const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;
/// `buf` must be at least as long as `source`
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 {
std.debug.assert(buf.len >= source.len);
var result = UncheckedSliceWriter{ .slice = buf };
const State = enum {
start,
forward_slash,
line_comment,
multiline_comment,
multiline_comment_end,
single_quoted,
single_quoted_escape,
double_quoted,
double_quoted_escape,
};
var state: State = .start;
var index: usize = 0;
var pending_start: ?usize = null;
var line_handler = LineHandler{ .buffer = source };
while (index < source.len) : (index += 1) {
const c = source[index];
// TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely
// cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed
// in the lexer, but comments are stripped before getting to the lexer.
switch (state) {
.start => switch (c) {
'/' => {
state = .forward_slash;
pending_start = index;
},
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
},
else => {
switch (c) {
'"' => state = .double_quoted,
'\'' => state = .single_quoted,
else => {},
}
result.write(c);
},
},
.forward_slash => switch (c) {
'/' => state = .line_comment,
'*' => {
state = .multiline_comment;
},
else => {
_ = line_handler.maybeIncrementLineNumber(index);
result.writeSlice(source[pending_start.? .. index + 1]);
pending_start = null;
state = .start;
},
},
.line_comment => switch (c) {
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
state = .start;
},
else => {},
},
.multiline_comment => switch (c) {
'\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
'\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
},
'*' => state = .multiline_comment_end,
else => {},
},
.multiline_comment_end => switch (c) {
'\r' => {
handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
// We only want to treat this as a newline if it's part of a CRLF pair. If it's
// not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
// functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
if (formsLineEndingPair(source, '\r', index + 1)) {
state = .multiline_comment;
}
},
'\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
state = .multiline_comment;
},
'/' => {
state = .start;
},
else => {
state = .multiline_comment;
},
},
.single_quoted => switch (c) {
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
state = .start;
result.write(c);
},
'\\' => {
state = .single_quoted_escape;
result.write(c);
},
'\'' => {
state = .start;
result.write(c);
},
else => {
result.write(c);
},
},
.single_quoted_escape => switch (c) {
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
state = .start;
result.write(c);
},
else => {
state = .single_quoted;
result.write(c);
},
},
.double_quoted => switch (c) {
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
state = .start;
result.write(c);
},
'\\' => {
state = .double_quoted_escape;
result.write(c);
},
'"' => {
state = .start;
result.write(c);
},
else => {
result.write(c);
},
},
.double_quoted_escape => switch (c) {
'\r', '\n' => {
_ = line_handler.incrementLineNumber(index);
state = .start;
result.write(c);
},
else => {
state = .double_quoted;
result.write(c);
},
},
}
}
return result.getWritten();
}
inline fn handleMultilineCarriageReturn(
source: []const u8,
line_handler: *LineHandler,
index: usize,
result: *UncheckedSliceWriter,
source_mappings: ?*SourceMappings,
) void {
// Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
// purposes of removing comments, but *should* be treated as a line ending for the
// purposes of line counting/source mapping
_ = line_handler.incrementLineNumber(index);
// So only write the \r if it's part of a CRLF pair
if (formsLineEndingPair(source, '\r', index + 1)) {
result.write('\r');
}
// And otherwise, we want to collapse the source mapping so that we can still know which
// line came from where.
else {
// Because the line gets collapsed, we need to decrement line number so that
// the next collapse acts on the first of the collapsed line numbers
line_handler.line_number -= 1;
if (source_mappings) |mappings| {
mappings.collapse(line_handler.line_number, 1);
}
}
}
pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
var buf = try allocator.alloc(u8, source.len);
errdefer allocator.free(buf);
var result = removeComments(source, buf, source_mappings);
return allocator.realloc(buf, result.len);
}
fn testRemoveComments(expected: []const u8, source: []const u8) !void {
const result = try removeCommentsAlloc(std.testing.allocator, source, null);
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings(expected, result);
}
test "basic" {
try testRemoveComments("", "// comment");
try testRemoveComments("", "/* comment */");
}
test "mixed" {
try testRemoveComments("hello", "hello// comment");
try testRemoveComments("hello", "hel/* comment */lo");
}
test "within a string" {
// escaped " is \"
try testRemoveComments(
\\blah"//som\"/*ething*/"BLAH
,
\\blah"//som\"/*ething*/"BLAH
);
}
test "line comments retain newlines" {
try testRemoveComments(
\\
\\
\\
,
\\// comment
\\// comment
\\// comment
);
try testRemoveComments("\r\n", "//comment\r\n");
}
test "crazy" {
try testRemoveComments(
\\blah"/*som*/\""BLAH
,
\\blah"/*som*/\""/*ething*/BLAH
);
try testRemoveComments(
\\blah"/*som*/"BLAH RCDATA "BEGIN END
\\
\\
\\hello
\\"
,
\\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END
\\// comment
\\//"blah blah" RCDATA {}
\\hello
\\"
);
}
test "multiline comment with newlines" {
// bare \r is not treated as a newline
try testRemoveComments("blahblah", "blah/*some\rthing*/blah");
try testRemoveComments(
\\blah
\\blah
,
\\blah/*some
\\thing*/blah
);
try testRemoveComments(
"blah\r\nblah",
"blah/*some\r\nthing*/blah",
);
// handle *<not /> correctly
try testRemoveComments(
\\blah
\\
\\
,
\\blah/*some
\\thing*
\\/bl*ah*/
);
}
test "comments appended to a line" {
try testRemoveComments(
\\blah
\\blah
,
\\blah // line comment
\\blah
);
try testRemoveComments(
"blah \r\nblah",
"blah // line comment\r\nblah",
);
}
test "remove comments with mappings" {
const allocator = std.testing.allocator;
var mut_source = "blah/*\rcommented line*\r/blah".*;
var mappings = SourceMappings{};
_ = try mappings.files.put(allocator, "test.rc");
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 });
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 });
try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 });
defer mappings.deinit(allocator);
var result = removeComments(&mut_source, &mut_source, &mappings);
try std.testing.expectEqualStrings("blahblah", result);
try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len);
try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line);
}
test "in place" {
var mut_source = "blah /* comment */ blah".*;
var result = removeComments(&mut_source, &mut_source, null);
try std.testing.expectEqualStrings("blah blah", result);
}

3356
src/resinator/compile.zig Normal file

File diff suppressed because it is too large Load Diff

1033
src/resinator/errors.zig Normal file

File diff suppressed because it is too large Load Diff

310
src/resinator/ico.zig Normal file
View File

@ -0,0 +1,310 @@
//! https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083
//! https://learn.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10)
//! https://learn.microsoft.com/en-us/windows/win32/menurc/newheader
//! https://learn.microsoft.com/en-us/windows/win32/menurc/resdir
//! https://learn.microsoft.com/en-us/windows/win32/menurc/localheader
const std = @import("std");
pub const ReadError = std.mem.Allocator.Error || error{ InvalidHeader, InvalidImageType, ImpossibleDataSize, UnexpectedEOF, ReadError };
pub fn read(allocator: std.mem.Allocator, reader: anytype, max_size: u64) ReadError!IconDir {
// Some Reader implementations have an empty ReadError error set which would
// cause 'unreachable else' if we tried to use an else in the switch, so we
// need to detect this case and not try to translate to ReadError
const empty_reader_errorset = @typeInfo(@TypeOf(reader).Error).ErrorSet == null or @typeInfo(@TypeOf(reader).Error).ErrorSet.?.len == 0;
if (empty_reader_errorset) {
return readAnyError(allocator, reader, max_size) catch |err| switch (err) {
error.EndOfStream => error.UnexpectedEOF,
else => |e| return e,
};
} else {
return readAnyError(allocator, reader, max_size) catch |err| switch (err) {
error.OutOfMemory,
error.InvalidHeader,
error.InvalidImageType,
error.ImpossibleDataSize,
=> |e| return e,
error.EndOfStream => error.UnexpectedEOF,
// The remaining errors are dependent on the `reader`, so
// we just translate them all to generic ReadError
else => error.ReadError,
};
}
}
// TODO: This seems like a somewhat strange pattern, could be a better way
// to do this. Maybe it makes more sense to handle the translation
// at the call site instead of having a helper function here.
pub fn readAnyError(allocator: std.mem.Allocator, reader: anytype, max_size: u64) !IconDir {
const reserved = try reader.readIntLittle(u16);
if (reserved != 0) {
return error.InvalidHeader;
}
const image_type = reader.readEnum(ImageType, .Little) catch |err| switch (err) {
error.InvalidValue => return error.InvalidImageType,
else => |e| return e,
};
const num_images = try reader.readIntLittle(u16);
// To avoid over-allocation in the case of a file that says it has way more
// entries than it actually does, we use an ArrayList with a conservatively
// limited initial capacity instead of allocating the entire slice at once.
const initial_capacity = @min(num_images, 8);
var entries = try std.ArrayList(Entry).initCapacity(allocator, initial_capacity);
errdefer entries.deinit();
var i: usize = 0;
while (i < num_images) : (i += 1) {
var entry: Entry = undefined;
entry.width = try reader.readByte();
entry.height = try reader.readByte();
entry.num_colors = try reader.readByte();
entry.reserved = try reader.readByte();
switch (image_type) {
.icon => {
entry.type_specific_data = .{ .icon = .{
.color_planes = try reader.readIntLittle(u16),
.bits_per_pixel = try reader.readIntLittle(u16),
} };
},
.cursor => {
entry.type_specific_data = .{ .cursor = .{
.hotspot_x = try reader.readIntLittle(u16),
.hotspot_y = try reader.readIntLittle(u16),
} };
},
}
entry.data_size_in_bytes = try reader.readIntLittle(u32);
entry.data_offset_from_start_of_file = try reader.readIntLittle(u32);
// Validate that the offset/data size is feasible
if (@as(u64, entry.data_offset_from_start_of_file) + entry.data_size_in_bytes > max_size) {
return error.ImpossibleDataSize;
}
// and that the data size is large enough for at least the header of an image
// Note: This avoids needing to deal with a miscompilation from the Win32 RC
// compiler when the data size of an image is specified as zero but there
// is data to-be-read at the offset. The Win32 RC compiler will output
// an ICON/CURSOR resource with a bogus size in its header but with no actual
// data bytes in it, leading to an invalid .res. Similarly, if, for example,
// there is valid PNG data at the image's offset, but the size is specified
// as fewer bytes than the PNG header, then the Win32 RC compiler will still
// treat it as a PNG (e.g. unconditionally set num_planes to 1) but the data
// of the resource will only be 1 byte so treating it as a PNG doesn't make
// sense (especially not when you have to read past the data size to determine
// that it's a PNG).
if (entry.data_size_in_bytes < 16) {
return error.ImpossibleDataSize;
}
try entries.append(entry);
}
return .{
.image_type = image_type,
.entries = try entries.toOwnedSlice(),
.allocator = allocator,
};
}
pub const ImageType = enum(u16) {
icon = 1,
cursor = 2,
};
pub const IconDir = struct {
image_type: ImageType,
/// Note: entries.len will always fit into a u16, since the field containing the
/// number of images in an ico file is a u16.
entries: []Entry,
allocator: std.mem.Allocator,
pub fn deinit(self: IconDir) void {
self.allocator.free(self.entries);
}
pub const res_header_byte_len = 6;
pub fn getResDataSize(self: IconDir) u32 {
// maxInt(u16) * Entry.res_byte_len = 917,490 which is well within the u32 range.
// Note: self.entries.len is limited to maxInt(u16)
return @intCast(IconDir.res_header_byte_len + self.entries.len * Entry.res_byte_len);
}
pub fn writeResData(self: IconDir, writer: anytype, first_image_id: u16) !void {
try writer.writeIntLittle(u16, 0);
try writer.writeIntLittle(u16, @intFromEnum(self.image_type));
// We know that entries.len must fit into a u16
try writer.writeIntLittle(u16, @as(u16, @intCast(self.entries.len)));
var image_id = first_image_id;
for (self.entries) |entry| {
try entry.writeResData(writer, image_id);
image_id += 1;
}
}
};
pub const Entry = struct {
// Icons are limited to u8 sizes, cursors can have u16,
// so we store as u16 and truncate when needed.
width: u16,
height: u16,
num_colors: u8,
/// This should always be zero, but whatever value it is gets
/// carried over so we need to store it
reserved: u8,
type_specific_data: union(ImageType) {
icon: struct {
color_planes: u16,
bits_per_pixel: u16,
},
cursor: struct {
hotspot_x: u16,
hotspot_y: u16,
},
},
data_size_in_bytes: u32,
data_offset_from_start_of_file: u32,
pub const res_byte_len = 14;
pub fn writeResData(self: Entry, writer: anytype, id: u16) !void {
switch (self.type_specific_data) {
.icon => |icon_data| {
try writer.writeIntLittle(u8, @as(u8, @truncate(self.width)));
try writer.writeIntLittle(u8, @as(u8, @truncate(self.height)));
try writer.writeIntLittle(u8, self.num_colors);
try writer.writeIntLittle(u8, self.reserved);
try writer.writeIntLittle(u16, icon_data.color_planes);
try writer.writeIntLittle(u16, icon_data.bits_per_pixel);
try writer.writeIntLittle(u32, self.data_size_in_bytes);
},
.cursor => |cursor_data| {
try writer.writeIntLittle(u16, self.width);
try writer.writeIntLittle(u16, self.height);
try writer.writeIntLittle(u16, cursor_data.hotspot_x);
try writer.writeIntLittle(u16, cursor_data.hotspot_y);
try writer.writeIntLittle(u32, self.data_size_in_bytes + 4);
},
}
try writer.writeIntLittle(u16, id);
}
};
test "icon" {
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
var fbs = std.io.fixedBufferStream(data);
const icon = try read(std.testing.allocator, fbs.reader(), data.len);
defer icon.deinit();
try std.testing.expectEqual(ImageType.icon, icon.image_type);
try std.testing.expectEqual(@as(usize, 1), icon.entries.len);
}
test "icon too many images" {
// Note that with verifying that all data sizes are within the file bounds and >= 16,
// it's not possible to hit EOF when looking for more RESDIR structures, since they are
// themselves 16 bytes long, so we'll always hit ImpossibleDataSize instead.
const data = "\x00\x00\x01\x00\x02\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
var fbs = std.io.fixedBufferStream(data);
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
}
test "icon data size past EOF" {
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x01\x00\x00\x16\x00\x00\x00" ++ [_]u8{0} ** 16;
var fbs = std.io.fixedBufferStream(data);
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
}
test "icon data offset past EOF" {
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x10\x00\x00\x00\x17\x00\x00\x00" ++ [_]u8{0} ** 16;
var fbs = std.io.fixedBufferStream(data);
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
}
test "icon data size too small" {
const data = "\x00\x00\x01\x00\x01\x00\x10\x10\x00\x00\x01\x00\x10\x00\x0F\x00\x00\x00\x16\x00\x00\x00";
var fbs = std.io.fixedBufferStream(data);
try std.testing.expectError(error.ImpossibleDataSize, read(std.testing.allocator, fbs.reader(), data.len));
}
pub const ImageFormat = enum {
dib,
png,
riff,
const riff_header = std.mem.readIntNative(u32, "RIFF");
const png_signature = std.mem.readIntNative(u64, "\x89PNG\r\n\x1a\n");
const ihdr_code = std.mem.readIntNative(u32, "IHDR");
const acon_form_type = std.mem.readIntNative(u32, "ACON");
pub fn detect(header_bytes: *const [16]u8) ImageFormat {
if (std.mem.readIntNative(u32, header_bytes[0..4]) == riff_header) return .riff;
if (std.mem.readIntNative(u64, header_bytes[0..8]) == png_signature) return .png;
return .dib;
}
pub fn validate(format: ImageFormat, header_bytes: *const [16]u8) bool {
return switch (format) {
.png => std.mem.readIntNative(u32, header_bytes[12..16]) == ihdr_code,
.riff => std.mem.readIntNative(u32, header_bytes[8..12]) == acon_form_type,
.dib => true,
};
}
};
/// Contains only the fields of BITMAPINFOHEADER (WinGDI.h) that are both:
/// - relevant to what we need, and
/// - are shared between all versions of BITMAPINFOHEADER (V4, V5).
pub const BitmapHeader = extern struct {
bcSize: u32,
bcWidth: i32,
bcHeight: i32,
bcPlanes: u16,
bcBitCount: u16,
pub fn version(self: *const BitmapHeader) Version {
return Version.get(self.bcSize);
}
/// https://en.wikipedia.org/wiki/BMP_file_format#DIB_header_(bitmap_information_header)
pub const Version = enum {
unknown,
@"win2.0", // Windows 2.0 or later
@"nt3.1", // Windows NT, 3.1x or later
@"nt4.0", // Windows NT 4.0, 95 or later
@"nt5.0", // Windows NT 5.0, 98 or later
pub fn get(header_size: u32) Version {
return switch (header_size) {
len(.@"win2.0") => .@"win2.0",
len(.@"nt3.1") => .@"nt3.1",
len(.@"nt4.0") => .@"nt4.0",
len(.@"nt5.0") => .@"nt5.0",
else => .unknown,
};
}
pub fn len(comptime v: Version) comptime_int {
return switch (v) {
.@"win2.0" => 12,
.@"nt3.1" => 40,
.@"nt4.0" => 108,
.@"nt5.0" => 124,
.unknown => unreachable,
};
}
pub fn nameForErrorDisplay(v: Version) []const u8 {
return switch (v) {
.unknown => "unknown",
.@"win2.0" => "Windows 2.0 (BITMAPCOREHEADER)",
.@"nt3.1" => "Windows NT, 3.1x (BITMAPINFOHEADER)",
.@"nt4.0" => "Windows NT 4.0, 95 (BITMAPV4HEADER)",
.@"nt5.0" => "Windows NT 5.0, 98 (BITMAPV5HEADER)",
};
}
};
};

877
src/resinator/lang.zig Normal file
View File

@ -0,0 +1,877 @@
const std = @import("std");
/// This function is specific to how the Win32 RC command line interprets
/// language IDs specified as integers.
/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed
/// - Wraps on overflow of u16
/// - Stops parsing on any invalid hexadecimal digits
/// - Errors if a digit is not the first char
/// - `-` (negative) prefix is allowed
pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 {
var result: u16 = 0;
const radix: u8 = 16;
var buf = str;
const Prefix = enum { none, minus };
var prefix: Prefix = .none;
switch (buf[0]) {
'-' => {
prefix = .minus;
buf = buf[1..];
},
else => {},
}
if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') {
buf = buf[2..];
}
for (buf, 0..) |c, i| {
const digit = switch (c) {
// On invalid digit for the radix, just stop parsing but don't fail
'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
else => {
// First digit must be valid
if (i == 0) {
return error.InvalidLanguageId;
}
break;
},
};
if (result != 0) {
result *%= radix;
}
result +%= digit;
}
switch (prefix) {
.none => {},
.minus => result = 0 -% result,
}
return result;
}
test parseInt {
try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16"));
try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A"));
try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz"));
try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1"));
try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16"));
try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100"));
try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001"));
try std.testing.expectError(error.InvalidLanguageId, parseInt("--1"));
try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha"));
try std.testing.expectError(error.InvalidLanguageId, parseInt("¹"));
try std.testing.expectError(error.InvalidLanguageId, parseInt("~1"));
}
/// This function is specific to how the Win32 RC command line interprets
/// language tags: invalid tags are rejected, but tags that don't have
/// a specific assigned ID but are otherwise valid enough will get
/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED.
pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 {
const maybe_id = try tagToId(tag);
if (maybe_id) |id| {
return @intFromEnum(id);
} else {
return LOCALE_CUSTOM_UNSPECIFIED;
}
}
pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId {
const parsed = try parse(tag);
// There are currently no language tags with assigned IDs that have
// multiple suffixes, so we can skip the lookup.
if (parsed.multiple_suffixes) return null;
const longest_known_tag = comptime blk: {
var len = 0;
for (@typeInfo(LanguageId).Enum.fields) |field| {
if (field.name.len > len) len = field.name.len;
}
break :blk len;
};
// If the tag is longer than the longest tag that has an assigned ID,
// then we can skip the lookup.
if (tag.len > longest_known_tag) return null;
var normalized_buf: [longest_known_tag]u8 = undefined;
// To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to
// omit the suffix, but only if the tag contains a valid alternate sort order.
var tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag;
const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf);
return std.meta.stringToEnum(LanguageId, normalized_tag) orelse {
// special case for a tag that has been mapped to the same ID
// twice.
if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) {
return LanguageId.ff_ng;
}
return null;
};
}
test tagToId {
try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?);
try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?);
try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?);
// Special case
try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?);
}
test "exhaustive tagToId" {
inline for (@typeInfo(LanguageId).Enum.fields) |field| {
const id = tagToId(field.name) catch |err| {
std.debug.print("tag: {s}\n", .{field.name});
return err;
};
try std.testing.expectEqual(@field(LanguageId, field.name), id orelse {
std.debug.print("tag: {s}, got null\n", .{field.name});
return error.TestExpectedEqual;
});
}
var buf: [32]u8 = undefined;
inline for (valid_alternate_sorts) |parsed_sort| {
var fbs = std.io.fixedBufferStream(&buf);
const writer = fbs.writer();
writer.writeAll(parsed_sort.language_code) catch unreachable;
writer.writeAll("-") catch unreachable;
writer.writeAll(parsed_sort.country_code.?) catch unreachable;
writer.writeAll("-") catch unreachable;
writer.writeAll(parsed_sort.suffix.?) catch unreachable;
const expected_field_name = comptime field: {
var name_buf: [5]u8 = undefined;
std.mem.copy(u8, &name_buf, parsed_sort.language_code);
name_buf[2] = '_';
std.mem.copy(u8, name_buf[3..], parsed_sort.country_code.?);
break :field name_buf;
};
const expected = @field(LanguageId, &expected_field_name);
const id = tagToId(fbs.getWritten()) catch |err| {
std.debug.print("tag: {s}\n", .{fbs.getWritten()});
return err;
};
try std.testing.expectEqual(expected, id orelse {
std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected });
return error.TestExpectedEqual;
});
}
}
fn normalizeTag(tag: []const u8, buf: []u8) []u8 {
std.debug.assert(buf.len >= tag.len);
for (tag, 0..) |c, i| {
if (c == '-')
buf[i] = '_'
else
buf[i] = std.ascii.toLower(c);
}
return buf[0..tag.len];
}
/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D
/// "When an LCID is requested for a locale without a
/// permanent LCID assignment, nor a temporary
/// assignment as above, the protocol will respond
/// with LOCALE_CUSTOM_UNSPECIFIED for all such
/// locales. Because this single value is used for
/// numerous possible locale names, it is impossible to
/// round trip this locale, even temporarily.
/// Applications should discard this value as soon as
/// possible and never persist it. If the system is
/// forced to respond to a request for
/// LCID_CUSTOM_UNSPECIFIED, it will fall back to
/// the current user locale. This is often incorrect but
/// may prevent an application or component from
/// failing. As the meaning of this temporary LCID is
/// unstable, it should never be used for interchange
/// or persisted data. This is a 1-to-many relationship
/// that is very unstable."
pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000;
pub const LANG_ENGLISH = 0x09;
pub const SUBLANG_ENGLISH_US = 0x01;
/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
pub fn MAKELANGID(primary: u10, sublang: u6) u16 {
return (@as(u16, primary) << 10) | sublang;
}
/// Language tag format expressed as a regular expression (rough approximation):
///
/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})?
/// lang | script | country | suffix
///
/// Notes:
/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix
/// parsing rules (e.g. `a-0` and `a-00000000` are allowed).
/// - There can also be any number of trailing suffix parts as long as they each
/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed.
/// - When doing lookups, trailing suffix parts are taken into account, e.g.
/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`.
/// - A suffix is only allowed if:
/// + Lang code is 1 char long, or
/// + A country code is present, or
/// + A script tag is not present and:
/// - the suffix is numeric-only and has a length of 3, or
/// - the lang is `qps` and the suffix is `ploca` or `plocm`
pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed {
var it = std.mem.splitAny(u8, lang_tag, "-_");
const lang_code = it.first();
const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code);
if (!is_valid_lang_code) return error.InvalidLanguageTag;
var parsed = Parsed{
.language_code = lang_code,
};
// The second part could be a script tag, a country code, or a suffix
if (it.next()) |part_str| {
// The lang code being length 1 behaves strangely, so fully special case it.
if (lang_code.len == 1) {
// This is almost certainly not the 'right' way to do this, but I don't have a method
// to determine how exactly these language tags are parsed, and it seems like
// suffix parsing rules apply generally (digits allowed, length of 1 to 8).
//
// However, because we want to be able to lookup `x-iv-mathan` normally without
// `multiple_suffixes` being set to true, we need to make sure to treat two-length
// alphabetic parts as a country code.
if (part_str.len == 2 and isAllAlphabetic(part_str)) {
parsed.country_code = part_str;
}
// Everything else, though, we can just throw into the suffix as long as the normal
// rules apply.
else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) {
parsed.suffix = part_str;
} else {
return error.InvalidLanguageTag;
}
} else if (part_str.len == 4 and isAllAlphabetic(part_str)) {
parsed.script_tag = part_str;
} else if (part_str.len == 2 and isAllAlphabetic(part_str)) {
parsed.country_code = part_str;
}
// Only a 3-len numeric suffix is allowed as the second part of a tag
else if (part_str.len == 3 and isAllNumeric(part_str)) {
parsed.suffix = part_str;
}
// Special case for qps-ploca and qps-plocm
else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and
(std.ascii.eqlIgnoreCase(part_str, "ploca") or
std.ascii.eqlIgnoreCase(part_str, "plocm")))
{
parsed.suffix = part_str;
} else {
return error.InvalidLanguageTag;
}
} else {
// If there's no part besides a 1-len lang code, then it is malformed
if (lang_code.len == 1) return error.InvalidLanguageTag;
return parsed;
}
if (parsed.script_tag != null) {
if (it.next()) |part_str| {
if (part_str.len == 2 and isAllAlphabetic(part_str)) {
parsed.country_code = part_str;
} else {
// Suffix is not allowed when a country code is not present.
return error.InvalidLanguageTag;
}
} else {
return parsed;
}
}
// We've now parsed any potential script tag/country codes, so anything remaining
// is a suffix
while (it.next()) |part_str| {
if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) {
return error.InvalidLanguageTag;
}
if (parsed.suffix == null) {
parsed.suffix = part_str;
} else {
// In theory we could return early here but we still want to validate
// that each part is a valid suffix all the way to the end, e.g.
// we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!`
// suffix part.
parsed.multiple_suffixes = true;
}
}
return parsed;
}
pub const Parsed = struct {
language_code: []const u8,
script_tag: ?[]const u8 = null,
country_code: ?[]const u8 = null,
/// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc
suffix: ?[]const u8 = null,
/// There can be any number of suffixes, but we don't need to care what their
/// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah`
/// can be seen as different from `ca-es-valencia`. Storing this as a bool
/// allows us to avoid needing either (a) dynamic allocation or (b) a limit to
/// the number of suffixes allowed when parsing.
multiple_suffixes: bool = false,
pub fn isSuffixValidSortOrder(self: Parsed) bool {
if (self.country_code == null) return false;
if (self.suffix == null) return false;
if (self.script_tag != null) return false;
if (self.multiple_suffixes) return false;
for (valid_alternate_sorts) |valid_sort| {
if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and
std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and
std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?))
{
return true;
}
}
return false;
}
};
/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table."
const valid_alternate_sorts = [_]Parsed{
// Note: x-IV-mathan is omitted due to how lookups are implemented.
// This table is used to make e.g. `de-de_phoneb` get looked up
// as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan
// instead needs to be looked up with the suffix included because
// `x-iv` is not a tag with an assigned ID.
.{ .language_code = "de", .country_code = "de", .suffix = "phoneb" },
.{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" },
.{ .language_code = "ka", .country_code = "ge", .suffix = "modern" },
.{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" },
.{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" },
.{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" },
.{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" },
.{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" },
.{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" },
.{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" },
.{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" },
.{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" },
.{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" },
};
test "parse" {
try std.testing.expectEqualDeep(Parsed{
.language_code = "en",
}, try parse("en"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "en",
.country_code = "us",
}, try parse("en-us"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "en",
.suffix = "123",
}, try parse("en-123"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "en",
.suffix = "123",
.multiple_suffixes = true,
}, try parse("en-123-blah"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "en",
.country_code = "us",
.suffix = "123",
.multiple_suffixes = true,
}, try parse("en-us_123-blah"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "eng",
.script_tag = "Latn",
}, try parse("eng-Latn"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "eng",
.script_tag = "Latn",
}, try parse("eng-Latn"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "ff",
.script_tag = "Latn",
.country_code = "NG",
}, try parse("ff-Latn-NG"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "qps",
.suffix = "Plocm",
}, try parse("qps-Plocm"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "qps",
.suffix = "ploca",
}, try parse("qps-ploca"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "x",
.country_code = "IV",
.suffix = "mathan",
}, try parse("x-IV-mathan"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "a",
.suffix = "a",
}, try parse("a-a"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "a",
.suffix = "000",
}, try parse("a-000"));
try std.testing.expectEqualDeep(Parsed{
.language_code = "a",
.suffix = "00000000",
}, try parse("a-00000000"));
// suffix not allowed if script tag is present without country code
try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix"));
// suffix must be 3 numeric digits if neither script tag nor country code is present
try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix"));
try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm"));
// 1-len lang code is not allowed if it's the only part
try std.testing.expectError(error.InvalidLanguageTag, parse("e"));
}
fn isAllAlphabetic(str: []const u8) bool {
for (str) |c| {
if (!std.ascii.isAlphabetic(c)) return false;
}
return true;
}
fn isAllAlphanumeric(str: []const u8) bool {
for (str) |c| {
if (!std.ascii.isAlphanumeric(c)) return false;
}
return true;
}
fn isAllNumeric(str: []const u8) bool {
for (str) |c| {
if (!std.ascii.isDigit(c)) return false;
}
return true;
}
/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
/// - Protocol Revision: 15.0
/// - Language / Language ID / Language Tag table in Appendix A
/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED)
/// - Normalized each language tag (lowercased, replaced all `-` with `_`)
/// - There is one special case where two tags are mapped to the same ID, the following
/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467.
/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria
/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts
/// table as 0x007F (LANG_INVARIANT).
pub const LanguageId = enum(u16) {
// Language tag = Language ID, // Language, Location (or type)
af = 0x0036, // Afrikaans
af_za = 0x0436, // Afrikaans, South Africa
sq = 0x001C, // Albanian
sq_al = 0x041C, // Albanian, Albania
gsw = 0x0084, // Alsatian
gsw_fr = 0x0484, // Alsatian, France
am = 0x005E, // Amharic
am_et = 0x045E, // Amharic, Ethiopia
ar = 0x0001, // Arabic
ar_dz = 0x1401, // Arabic, Algeria
ar_bh = 0x3C01, // Arabic, Bahrain
ar_eg = 0x0c01, // Arabic, Egypt
ar_iq = 0x0801, // Arabic, Iraq
ar_jo = 0x2C01, // Arabic, Jordan
ar_kw = 0x3401, // Arabic, Kuwait
ar_lb = 0x3001, // Arabic, Lebanon
ar_ly = 0x1001, // Arabic, Libya
ar_ma = 0x1801, // Arabic, Morocco
ar_om = 0x2001, // Arabic, Oman
ar_qa = 0x4001, // Arabic, Qatar
ar_sa = 0x0401, // Arabic, Saudi Arabia
ar_sy = 0x2801, // Arabic, Syria
ar_tn = 0x1C01, // Arabic, Tunisia
ar_ae = 0x3801, // Arabic, U.A.E.
ar_ye = 0x2401, // Arabic, Yemen
hy = 0x002B, // Armenian
hy_am = 0x042B, // Armenian, Armenia
as = 0x004D, // Assamese
as_in = 0x044D, // Assamese, India
az_cyrl = 0x742C, // Azerbaijani (Cyrillic)
az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan
az = 0x002C, // Azerbaijani (Latin)
az_latn = 0x782C, // Azerbaijani (Latin)
az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan
bn = 0x0045, // Bangla
bn_bd = 0x0845, // Bangla, Bangladesh
bn_in = 0x0445, // Bangla, India
ba = 0x006D, // Bashkir
ba_ru = 0x046D, // Bashkir, Russia
eu = 0x002D, // Basque
eu_es = 0x042D, // Basque, Spain
be = 0x0023, // Belarusian
be_by = 0x0423, // Belarusian, Belarus
bs_cyrl = 0x641A, // Bosnian (Cyrillic)
bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina
bs_latn = 0x681A, // Bosnian (Latin)
bs = 0x781A, // Bosnian (Latin)
bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina
br = 0x007E, // Breton
br_fr = 0x047E, // Breton, France
bg = 0x0002, // Bulgarian
bg_bg = 0x0402, // Bulgarian, Bulgaria
my = 0x0055, // Burmese
my_mm = 0x0455, // Burmese, Myanmar
ca = 0x0003, // Catalan
ca_es = 0x0403, // Catalan, Spain
tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco
ku = 0x0092, // Central Kurdish
ku_arab = 0x7c92, // Central Kurdish
ku_arab_iq = 0x0492, // Central Kurdish, Iraq
chr = 0x005C, // Cherokee
chr_cher = 0x7c5C, // Cherokee
chr_cher_us = 0x045C, // Cherokee, United States
zh_hans = 0x0004, // Chinese (Simplified)
zh = 0x7804, // Chinese (Simplified)
zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China
zh_sg = 0x1004, // Chinese (Simplified), Singapore
zh_hant = 0x7C04, // Chinese (Traditional)
zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R.
zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R.
zh_tw = 0x0404, // Chinese (Traditional), Taiwan
co = 0x0083, // Corsican
co_fr = 0x0483, // Corsican, France
hr = 0x001A, // Croatian
hr_hr = 0x041A, // Croatian, Croatia
hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina
cs = 0x0005, // Czech
cs_cz = 0x0405, // Czech, Czech Republic
da = 0x0006, // Danish
da_dk = 0x0406, // Danish, Denmark
prs = 0x008C, // Dari
prs_af = 0x048C, // Dari, Afghanistan
dv = 0x0065, // Divehi
dv_mv = 0x0465, // Divehi, Maldives
nl = 0x0013, // Dutch
nl_be = 0x0813, // Dutch, Belgium
nl_nl = 0x0413, // Dutch, Netherlands
dz_bt = 0x0C51, // Dzongkha, Bhutan
en = 0x0009, // English
en_au = 0x0C09, // English, Australia
en_bz = 0x2809, // English, Belize
en_ca = 0x1009, // English, Canada
en_029 = 0x2409, // English, Caribbean
en_hk = 0x3C09, // English, Hong Kong
en_in = 0x4009, // English, India
en_ie = 0x1809, // English, Ireland
en_jm = 0x2009, // English, Jamaica
en_my = 0x4409, // English, Malaysia
en_nz = 0x1409, // English, New Zealand
en_ph = 0x3409, // English, Republic of the Philippines
en_sg = 0x4809, // English, Singapore
en_za = 0x1C09, // English, South Africa
en_tt = 0x2c09, // English, Trinidad and Tobago
en_ae = 0x4C09, // English, United Arab Emirates
en_gb = 0x0809, // English, United Kingdom
en_us = 0x0409, // English, United States
en_zw = 0x3009, // English, Zimbabwe
et = 0x0025, // Estonian
et_ee = 0x0425, // Estonian, Estonia
fo = 0x0038, // Faroese
fo_fo = 0x0438, // Faroese, Faroe Islands
fil = 0x0064, // Filipino
fil_ph = 0x0464, // Filipino, Philippines
fi = 0x000B, // Finnish
fi_fi = 0x040B, // Finnish, Finland
fr = 0x000C, // French
fr_be = 0x080C, // French, Belgium
fr_cm = 0x2c0C, // French, Cameroon
fr_ca = 0x0c0C, // French, Canada
fr_029 = 0x1C0C, // French, Caribbean
fr_cd = 0x240C, // French, Congo, DRC
fr_ci = 0x300C, // French, Côte d'Ivoire
fr_fr = 0x040C, // French, France
fr_ht = 0x3c0C, // French, Haiti
fr_lu = 0x140C, // French, Luxembourg
fr_ml = 0x340C, // French, Mali
fr_ma = 0x380C, // French, Morocco
fr_mc = 0x180C, // French, Principality of Monaco
fr_re = 0x200C, // French, Reunion
fr_sn = 0x280C, // French, Senegal
fr_ch = 0x100C, // French, Switzerland
fy = 0x0062, // Frisian
fy_nl = 0x0462, // Frisian, Netherlands
ff = 0x0067, // Fulah
ff_latn = 0x7C67, // Fulah (Latin)
ff_ng = 0x0467, // Fulah, Nigeria
ff_latn_sn = 0x0867, // Fulah, Senegal
gl = 0x0056, // Galician
gl_es = 0x0456, // Galician, Spain
ka = 0x0037, // Georgian
ka_ge = 0x0437, // Georgian, Georgia
de = 0x0007, // German
de_at = 0x0C07, // German, Austria
de_de = 0x0407, // German, Germany
de_li = 0x1407, // German, Liechtenstein
de_lu = 0x1007, // German, Luxembourg
de_ch = 0x0807, // German, Switzerland
el = 0x0008, // Greek
el_gr = 0x0408, // Greek, Greece
kl = 0x006F, // Greenlandic
kl_gl = 0x046F, // Greenlandic, Greenland
gn = 0x0074, // Guarani
gn_py = 0x0474, // Guarani, Paraguay
gu = 0x0047, // Gujarati
gu_in = 0x0447, // Gujarati, India
ha = 0x0068, // Hausa (Latin)
ha_latn = 0x7C68, // Hausa (Latin)
ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria
haw = 0x0075, // Hawaiian
haw_us = 0x0475, // Hawaiian, United States
he = 0x000D, // Hebrew
he_il = 0x040D, // Hebrew, Israel
hi = 0x0039, // Hindi
hi_in = 0x0439, // Hindi, India
hu = 0x000E, // Hungarian
hu_hu = 0x040E, // Hungarian, Hungary
is = 0x000F, // Icelandic
is_is = 0x040F, // Icelandic, Iceland
ig = 0x0070, // Igbo
ig_ng = 0x0470, // Igbo, Nigeria
id = 0x0021, // Indonesian
id_id = 0x0421, // Indonesian, Indonesia
iu = 0x005D, // Inuktitut (Latin)
iu_latn = 0x7C5D, // Inuktitut (Latin)
iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada
iu_cans = 0x785D, // Inuktitut (Syllabics)
iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada
ga = 0x003C, // Irish
ga_ie = 0x083C, // Irish, Ireland
it = 0x0010, // Italian
it_it = 0x0410, // Italian, Italy
it_ch = 0x0810, // Italian, Switzerland
ja = 0x0011, // Japanese
ja_jp = 0x0411, // Japanese, Japan
kn = 0x004B, // Kannada
kn_in = 0x044B, // Kannada, India
kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria
ks = 0x0060, // Kashmiri
ks_arab = 0x0460, // Kashmiri, Perso-Arabic
ks_deva_in = 0x0860, // Kashmiri (Devanagari), India
kk = 0x003F, // Kazakh
kk_kz = 0x043F, // Kazakh, Kazakhstan
km = 0x0053, // Khmer
km_kh = 0x0453, // Khmer, Cambodia
quc = 0x0086, // K'iche
quc_latn_gt = 0x0486, // K'iche, Guatemala
rw = 0x0087, // Kinyarwanda
rw_rw = 0x0487, // Kinyarwanda, Rwanda
sw = 0x0041, // Kiswahili
sw_ke = 0x0441, // Kiswahili, Kenya
kok = 0x0057, // Konkani
kok_in = 0x0457, // Konkani, India
ko = 0x0012, // Korean
ko_kr = 0x0412, // Korean, Korea
ky = 0x0040, // Kyrgyz
ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan
lo = 0x0054, // Lao
lo_la = 0x0454, // Lao, Lao P.D.R.
la_va = 0x0476, // Latin, Vatican City
lv = 0x0026, // Latvian
lv_lv = 0x0426, // Latvian, Latvia
lt = 0x0027, // Lithuanian
lt_lt = 0x0427, // Lithuanian, Lithuania
dsb = 0x7C2E, // Lower Sorbian
dsb_de = 0x082E, // Lower Sorbian, Germany
lb = 0x006E, // Luxembourgish
lb_lu = 0x046E, // Luxembourgish, Luxembourg
mk = 0x002F, // Macedonian
mk_mk = 0x042F, // Macedonian, North Macedonia
ms = 0x003E, // Malay
ms_bn = 0x083E, // Malay, Brunei Darussalam
ms_my = 0x043E, // Malay, Malaysia
ml = 0x004C, // Malayalam
ml_in = 0x044C, // Malayalam, India
mt = 0x003A, // Maltese
mt_mt = 0x043A, // Maltese, Malta
mi = 0x0081, // Maori
mi_nz = 0x0481, // Maori, New Zealand
arn = 0x007A, // Mapudungun
arn_cl = 0x047A, // Mapudungun, Chile
mr = 0x004E, // Marathi
mr_in = 0x044E, // Marathi, India
moh = 0x007C, // Mohawk
moh_ca = 0x047C, // Mohawk, Canada
mn = 0x0050, // Mongolian (Cyrillic)
mn_cyrl = 0x7850, // Mongolian (Cyrillic)
mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia
mn_mong = 0x7C50, // Mongolian (Traditional Mongolian)
mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China
mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia
ne = 0x0061, // Nepali
ne_in = 0x0861, // Nepali, India
ne_np = 0x0461, // Nepali, Nepal
no = 0x0014, // Norwegian (Bokmal)
nb = 0x7C14, // Norwegian (Bokmal)
nb_no = 0x0414, // Norwegian (Bokmal), Norway
nn = 0x7814, // Norwegian (Nynorsk)
nn_no = 0x0814, // Norwegian (Nynorsk), Norway
oc = 0x0082, // Occitan
oc_fr = 0x0482, // Occitan, France
@"or" = 0x0048, // Odia
or_in = 0x0448, // Odia, India
om = 0x0072, // Oromo
om_et = 0x0472, // Oromo, Ethiopia
ps = 0x0063, // Pashto
ps_af = 0x0463, // Pashto, Afghanistan
fa = 0x0029, // Persian
fa_ir = 0x0429, // Persian, Iran
pl = 0x0015, // Polish
pl_pl = 0x0415, // Polish, Poland
pt = 0x0016, // Portuguese
pt_br = 0x0416, // Portuguese, Brazil
pt_pt = 0x0816, // Portuguese, Portugal
qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing
qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing
qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales
pa = 0x0046, // Punjabi
pa_arab = 0x7C46, // Punjabi
pa_in = 0x0446, // Punjabi, India
pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan
quz = 0x006B, // Quechua
quz_bo = 0x046B, // Quechua, Bolivia
quz_ec = 0x086B, // Quechua, Ecuador
quz_pe = 0x0C6B, // Quechua, Peru
ro = 0x0018, // Romanian
ro_md = 0x0818, // Romanian, Moldova
ro_ro = 0x0418, // Romanian, Romania
rm = 0x0017, // Romansh
rm_ch = 0x0417, // Romansh, Switzerland
ru = 0x0019, // Russian
ru_md = 0x0819, // Russian, Moldova
ru_ru = 0x0419, // Russian, Russia
sah = 0x0085, // Sakha
sah_ru = 0x0485, // Sakha, Russia
smn = 0x703B, // Sami (Inari)
smn_fi = 0x243B, // Sami (Inari), Finland
smj = 0x7C3B, // Sami (Lule)
smj_no = 0x103B, // Sami (Lule), Norway
smj_se = 0x143B, // Sami (Lule), Sweden
se = 0x003B, // Sami (Northern)
se_fi = 0x0C3B, // Sami (Northern), Finland
se_no = 0x043B, // Sami (Northern), Norway
se_se = 0x083B, // Sami (Northern), Sweden
sms = 0x743B, // Sami (Skolt)
sms_fi = 0x203B, // Sami (Skolt), Finland
sma = 0x783B, // Sami (Southern)
sma_no = 0x183B, // Sami (Southern), Norway
sma_se = 0x1C3B, // Sami (Southern), Sweden
sa = 0x004F, // Sanskrit
sa_in = 0x044F, // Sanskrit, India
gd = 0x0091, // Scottish Gaelic
gd_gb = 0x0491, // Scottish Gaelic, United Kingdom
sr_cyrl = 0x6C1A, // Serbian (Cyrillic)
sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina
sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro
sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia
sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former)
sr_latn = 0x701A, // Serbian (Latin)
sr = 0x7C1A, // Serbian (Latin)
sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina
sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro
sr_latn_rs = 0x241A, // Serbian (Latin), Serbia
sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former)
nso = 0x006C, // Sesotho sa Leboa
nso_za = 0x046C, // Sesotho sa Leboa, South Africa
tn = 0x0032, // Setswana
tn_bw = 0x0832, // Setswana, Botswana
tn_za = 0x0432, // Setswana, South Africa
sd = 0x0059, // Sindhi
sd_arab = 0x7C59, // Sindhi
sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan
si = 0x005B, // Sinhala
si_lk = 0x045B, // Sinhala, Sri Lanka
sk = 0x001B, // Slovak
sk_sk = 0x041B, // Slovak, Slovakia
sl = 0x0024, // Slovenian
sl_si = 0x0424, // Slovenian, Slovenia
so = 0x0077, // Somali
so_so = 0x0477, // Somali, Somalia
st = 0x0030, // Sotho
st_za = 0x0430, // Sotho, South Africa
es = 0x000A, // Spanish
es_ar = 0x2C0A, // Spanish, Argentina
es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela
es_bo = 0x400A, // Spanish, Bolivia
es_cl = 0x340A, // Spanish, Chile
es_co = 0x240A, // Spanish, Colombia
es_cr = 0x140A, // Spanish, Costa Rica
es_cu = 0x5c0A, // Spanish, Cuba
es_do = 0x1c0A, // Spanish, Dominican Republic
es_ec = 0x300A, // Spanish, Ecuador
es_sv = 0x440A, // Spanish, El Salvador
es_gt = 0x100A, // Spanish, Guatemala
es_hn = 0x480A, // Spanish, Honduras
es_419 = 0x580A, // Spanish, Latin America
es_mx = 0x080A, // Spanish, Mexico
es_ni = 0x4C0A, // Spanish, Nicaragua
es_pa = 0x180A, // Spanish, Panama
es_py = 0x3C0A, // Spanish, Paraguay
es_pe = 0x280A, // Spanish, Peru
es_pr = 0x500A, // Spanish, Puerto Rico
es_es_tradnl = 0x040A, // Spanish, Spain
es_es = 0x0c0A, // Spanish, Spain
es_us = 0x540A, // Spanish, United States
es_uy = 0x380A, // Spanish, Uruguay
sv = 0x001D, // Swedish
sv_fi = 0x081D, // Swedish, Finland
sv_se = 0x041D, // Swedish, Sweden
syr = 0x005A, // Syriac
syr_sy = 0x045A, // Syriac, Syria
tg = 0x0028, // Tajik (Cyrillic)
tg_cyrl = 0x7C28, // Tajik (Cyrillic)
tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan
tzm = 0x005F, // Tamazight (Latin)
tzm_latn = 0x7C5F, // Tamazight (Latin)
tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria
ta = 0x0049, // Tamil
ta_in = 0x0449, // Tamil, India
ta_lk = 0x0849, // Tamil, Sri Lanka
tt = 0x0044, // Tatar
tt_ru = 0x0444, // Tatar, Russia
te = 0x004A, // Telugu
te_in = 0x044A, // Telugu, India
th = 0x001E, // Thai
th_th = 0x041E, // Thai, Thailand
bo = 0x0051, // Tibetan
bo_cn = 0x0451, // Tibetan, People's Republic of China
ti = 0x0073, // Tigrinya
ti_er = 0x0873, // Tigrinya, Eritrea
ti_et = 0x0473, // Tigrinya, Ethiopia
ts = 0x0031, // Tsonga
ts_za = 0x0431, // Tsonga, South Africa
tr = 0x001F, // Turkish
tr_tr = 0x041F, // Turkish, Turkey
tk = 0x0042, // Turkmen
tk_tm = 0x0442, // Turkmen, Turkmenistan
uk = 0x0022, // Ukrainian
uk_ua = 0x0422, // Ukrainian, Ukraine
hsb = 0x002E, // Upper Sorbian
hsb_de = 0x042E, // Upper Sorbian, Germany
ur = 0x0020, // Urdu
ur_in = 0x0820, // Urdu, India
ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan
ug = 0x0080, // Uyghur
ug_cn = 0x0480, // Uyghur, People's Republic of China
uz_cyrl = 0x7843, // Uzbek (Cyrillic)
uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan
uz = 0x0043, // Uzbek (Latin)
uz_latn = 0x7C43, // Uzbek (Latin)
uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan
ca_es_valencia = 0x0803, // Valencian, Spain
ve = 0x0033, // Venda
ve_za = 0x0433, // Venda, South Africa
vi = 0x002A, // Vietnamese
vi_vn = 0x042A, // Vietnamese, Vietnam
cy = 0x0052, // Welsh
cy_gb = 0x0452, // Welsh, United Kingdom
wo = 0x0088, // Wolof
wo_sn = 0x0488, // Wolof, Senegal
xh = 0x0034, // Xhosa
xh_za = 0x0434, // Xhosa, South Africa
ii = 0x0078, // Yi
ii_cn = 0x0478, // Yi, People's Republic of China
yi_001 = 0x043D, // Yiddish, World
yo = 0x006A, // Yoruba
yo_ng = 0x046A, // Yoruba, Nigeria
zu = 0x0035, // Zulu
zu_za = 0x0435, // Zulu, South Africa
/// Special case
x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting"
};

1104
src/resinator/lex.zig Normal file

File diff suppressed because it is too large Load Diff

904
src/resinator/literals.zig Normal file
View File

@ -0,0 +1,904 @@
const std = @import("std");
const code_pages = @import("code_pages.zig");
const CodePage = code_pages.CodePage;
const windows1252 = @import("windows1252.zig");
const ErrorDetails = @import("errors.zig").ErrorDetails;
const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
const Token = @import("lex.zig").Token;
/// rc is maximally liberal in terms of what it accepts as a number literal
/// for data values. As long as it starts with a number or - or ~, that's good enough.
pub fn isValidNumberDataLiteral(str: []const u8) bool {
if (str.len == 0) return false;
switch (str[0]) {
'~', '-', '0'...'9' => return true,
else => return false,
}
}
pub const SourceBytes = struct {
slice: []const u8,
code_page: CodePage,
};
pub const StringType = enum { ascii, wide };
/// Valid escapes:
/// "" -> "
/// \a, \A => 0x08 (not 0x07 like in C)
/// \n => 0x0A
/// \r => 0x0D
/// \t, \T => 0x09
/// \\ => \
/// \nnn => byte with numeric value given by nnn interpreted as octal
/// (wraps on overflow, number of digits can be 1-3 for ASCII strings
/// and 1-7 for wide strings)
/// \xhh => byte with numeric value given by hh interpreted as hex
/// (number of digits can be 0-2 for ASCII strings and 0-4 for
/// wide strings)
/// \<\r+> => \
/// \<[\r\n\t ]+> => <nothing>
///
/// Special cases:
/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself
/// <\r> => <nothing>
/// <\n+><\w+?\n?> => <space><\n>
///
/// Special, especially weird case:
/// \"" => "
/// NOTE: This leads to footguns because the preprocessor can start parsing things
/// out-of-sync with the RC compiler, expanding macros within string literals, etc.
/// This parse function handles this case the same as the Windows RC compiler, but
/// \" within a string literal is treated as an error by the lexer, so the relevant
/// branches should never actually be hit during this function.
pub const IterativeStringParser = struct {
source: []const u8,
code_page: CodePage,
/// The type of the string inferred by the prefix (L"" or "")
/// This is what matters for things like the maximum digits in an
/// escape sequence, whether or not invalid escape sequences are skipped, etc.
declared_string_type: StringType,
pending_codepoint: ?u21 = null,
num_pending_spaces: u8 = 0,
index: usize = 0,
column: usize = 0,
diagnostics: ?DiagnosticsContext = null,
seen_tab: bool = false,
const State = enum {
normal,
quote,
newline,
escaped,
escaped_cr,
escaped_newlines,
escaped_octal,
escaped_hex,
};
pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
const declared_string_type: StringType = switch (bytes.slice[0]) {
'L', 'l' => .wide,
else => .ascii,
};
var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
var column = options.start_column + 1; // for the removed "
if (declared_string_type == .wide) {
source = source[1..]; // remove L
column += 1; // for the removed L
}
return .{
.source = source,
.code_page = bytes.code_page,
.declared_string_type = declared_string_type,
.column = column,
.diagnostics = options.diagnostics,
};
}
pub const ParsedCodepoint = struct {
codepoint: u21,
from_escaped_integer: bool = false,
};
pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
const result = try self.nextUnchecked();
if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
switch (result.?.codepoint) {
0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => {
const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
.rc_would_miscompile_codepoint_skip
else
.rc_would_miscompile_codepoint_byte_swap;
try self.diagnostics.?.diagnostics.append(ErrorDetails{
.err = err,
.type = .warning,
.token = self.diagnostics.?.token,
.extra = .{ .number = result.?.codepoint },
});
try self.diagnostics.?.diagnostics.append(ErrorDetails{
.err = err,
.type = .note,
.token = self.diagnostics.?.token,
.print_source_line = false,
.extra = .{ .number = result.?.codepoint },
});
},
else => {},
}
}
return result;
}
pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
if (self.num_pending_spaces > 0) {
// Ensure that we don't get into this predicament so we can ensure that
// the order of processing any pending stuff doesn't matter
std.debug.assert(self.pending_codepoint == null);
self.num_pending_spaces -= 1;
return .{ .codepoint = ' ' };
}
if (self.pending_codepoint) |pending_codepoint| {
self.pending_codepoint = null;
return .{ .codepoint = pending_codepoint };
}
if (self.index >= self.source.len) return null;
var state: State = .normal;
var string_escape_n: u16 = 0;
var string_escape_i: u8 = 0;
const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
.ascii => 3,
.wide => 7,
};
const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
.ascii => 2,
.wide => 4,
};
while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) {
const c = codepoint.value;
var backtrack = false;
defer {
if (backtrack) {
self.index -= codepoint.byte_len;
} else {
if (c == '\t') {
self.column += columnsUntilTabStop(self.column, 8);
} else {
self.column += codepoint.byte_len;
}
}
}
switch (state) {
.normal => switch (c) {
'\\' => state = .escaped,
'"' => state = .quote,
'\r' => {},
'\n' => state = .newline,
'\t' => {
// Only warn about a tab getting converted to spaces once per string
if (self.diagnostics != null and !self.seen_tab) {
try self.diagnostics.?.diagnostics.append(ErrorDetails{
.err = .tab_converted_to_spaces,
.type = .warning,
.token = self.diagnostics.?.token,
});
try self.diagnostics.?.diagnostics.append(ErrorDetails{
.err = .tab_converted_to_spaces,
.type = .note,
.token = self.diagnostics.?.token,
.print_source_line = false,
});
self.seen_tab = true;
}
const cols = columnsUntilTabStop(self.column, 8);
self.num_pending_spaces = @intCast(cols - 1);
self.index += codepoint.byte_len;
return .{ .codepoint = ' ' };
},
else => {
self.index += codepoint.byte_len;
return .{ .codepoint = c };
},
},
.quote => switch (c) {
'"' => {
// "" => "
self.index += codepoint.byte_len;
return .{ .codepoint = '"' };
},
else => unreachable, // this is a bug in the lexer
},
.newline => switch (c) {
'\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
else => {
// backtrack so that we handle the current char properly
backtrack = true;
// <space><newline>
self.index += codepoint.byte_len;
self.pending_codepoint = '\n';
return .{ .codepoint = ' ' };
},
},
.escaped => switch (c) {
'\r' => state = .escaped_cr,
'\n' => state = .escaped_newlines,
'0'...'7' => {
string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
string_escape_i = 1;
state = .escaped_octal;
},
'x', 'X' => {
string_escape_n = 0;
string_escape_i = 0;
state = .escaped_hex;
},
else => {
switch (c) {
'a', 'A' => {
self.index += codepoint.byte_len;
return .{ .codepoint = '\x08' };
}, // might be a bug in RC, but matches its behavior
'n' => {
self.index += codepoint.byte_len;
return .{ .codepoint = '\n' };
},
'r' => {
self.index += codepoint.byte_len;
return .{ .codepoint = '\r' };
},
't', 'T' => {
self.index += codepoint.byte_len;
return .{ .codepoint = '\t' };
},
'\\' => {
self.index += codepoint.byte_len;
return .{ .codepoint = '\\' };
},
'"' => {
// \" is a special case that doesn't get the \ included,
backtrack = true;
},
else => switch (self.declared_string_type) {
.wide => {}, // invalid escape sequences are skipped in wide strings
.ascii => {
// backtrack so that we handle the current char properly
backtrack = true;
self.index += codepoint.byte_len;
return .{ .codepoint = '\\' };
},
},
}
state = .normal;
},
},
.escaped_cr => switch (c) {
'\r' => {},
'\n' => state = .escaped_newlines,
else => {
// backtrack so that we handle the current char properly
backtrack = true;
self.index += codepoint.byte_len;
return .{ .codepoint = '\\' };
},
},
.escaped_newlines => switch (c) {
'\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
else => {
// backtrack so that we handle the current char properly
backtrack = true;
state = .normal;
},
},
.escaped_octal => switch (c) {
'0'...'7' => {
string_escape_n *%= 8;
string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
string_escape_i += 1;
if (string_escape_i == max_octal_escape_digits) {
const escaped_value = switch (self.declared_string_type) {
.ascii => @as(u8, @truncate(string_escape_n)),
.wide => string_escape_n,
};
self.index += codepoint.byte_len;
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
}
},
else => {
// backtrack so that we handle the current char properly
backtrack = true;
// write out whatever byte we have parsed so far
const escaped_value = switch (self.declared_string_type) {
.ascii => @as(u8, @truncate(string_escape_n)),
.wide => string_escape_n,
};
self.index += codepoint.byte_len;
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
},
},
.escaped_hex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
string_escape_n *= 16;
string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
string_escape_i += 1;
if (string_escape_i == max_hex_escape_digits) {
const escaped_value = switch (self.declared_string_type) {
.ascii => @as(u8, @truncate(string_escape_n)),
.wide => string_escape_n,
};
self.index += codepoint.byte_len;
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
}
},
else => {
// backtrack so that we handle the current char properly
backtrack = true;
// write out whatever byte we have parsed so far
// (even with 0 actual digits, \x alone parses to 0)
const escaped_value = switch (self.declared_string_type) {
.ascii => @as(u8, @truncate(string_escape_n)),
.wide => string_escape_n,
};
self.index += codepoint.byte_len;
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
},
},
}
}
switch (state) {
.normal, .escaped_newlines => {},
.newline => {
// <space><newline>
self.pending_codepoint = '\n';
return .{ .codepoint = ' ' };
},
.escaped, .escaped_cr => return .{ .codepoint = '\\' },
.escaped_octal, .escaped_hex => {
const escaped_value = switch (self.declared_string_type) {
.ascii => @as(u8, @truncate(string_escape_n)),
.wide => string_escape_n,
};
return .{ .codepoint = escaped_value, .from_escaped_integer = true };
},
.quote => unreachable, // this is a bug in the lexer
}
return null;
}
};
pub const StringParseOptions = struct {
start_column: usize = 0,
diagnostics: ?DiagnosticsContext = null,
output_code_page: CodePage = .windows1252,
};
pub fn parseQuotedString(
comptime literal_type: StringType,
allocator: std.mem.Allocator,
bytes: SourceBytes,
options: StringParseOptions,
) !(switch (literal_type) {
.ascii => []u8,
.wide => [:0]u16,
}) {
const T = if (literal_type == .ascii) u8 else u16;
std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len);
errdefer buf.deinit();
var iterative_parser = IterativeStringParser.init(bytes, options);
while (try iterative_parser.next()) |parsed| {
const c = parsed.codepoint;
if (parsed.from_escaped_integer) {
try buf.append(@intCast(c));
} else {
switch (literal_type) {
.ascii => switch (options.output_code_page) {
.windows1252 => {
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
try buf.append(best_fit);
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
try buf.append('?');
} else {
try buf.appendSlice("??");
}
},
.utf8 => {
var codepoint_to_encode = c;
if (c == code_pages.Codepoint.invalid) {
codepoint_to_encode = '<27>';
}
var utf8_buf: [4]u8 = undefined;
const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
try buf.appendSlice(utf8_buf[0..utf8_len]);
},
else => unreachable, // Unsupported code page
},
.wide => {
if (c == code_pages.Codepoint.invalid) {
try buf.append(std.mem.nativeToLittle(u16, '<27>'));
} else if (c < 0x10000) {
const short: u16 = @intCast(c);
try buf.append(std.mem.nativeToLittle(u16, short));
} else {
const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
try buf.append(std.mem.nativeToLittle(u16, high));
const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
try buf.append(std.mem.nativeToLittle(u16, low));
}
},
}
}
}
if (literal_type == .wide) {
return buf.toOwnedSliceSentinel(0);
} else {
return buf.toOwnedSlice();
}
}
pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
std.debug.assert(bytes.slice.len >= 2); // ""
return parseQuotedString(.ascii, allocator, bytes, options);
}
pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
std.debug.assert(bytes.slice.len >= 3); // L""
return parseQuotedString(.wide, allocator, bytes, options);
}
pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
std.debug.assert(bytes.slice.len >= 2); // ""
return parseQuotedString(.wide, allocator, bytes, options);
}
pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
std.debug.assert(bytes.slice.len >= 2); // ""
return parseQuotedString(.ascii, allocator, bytes, options);
}
test "parse quoted ascii string" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
.slice =
\\"hello"
,
.code_page = .windows1252,
}, .{}));
// hex with 0 digits
try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
.slice =
\\"\x"
,
.code_page = .windows1252,
}, .{}));
// hex max of 2 digits
try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
.slice =
\\"\XfFf"
,
.code_page = .windows1252,
}, .{}));
// octal with invalid octal digit
try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
.slice =
\\"\19"
,
.code_page = .windows1252,
}, .{}));
// escaped quotes
try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
.slice =
\\" "" "
,
.code_page = .windows1252,
}, .{}));
// backslash right before escaped quotes
try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
.slice =
\\"\"""
,
.code_page = .windows1252,
}, .{}));
// octal overflow
try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
.slice =
\\"\401"
,
.code_page = .windows1252,
}, .{}));
// escapes
try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
.slice =
\\"\a\n\r\t\\"
,
.code_page = .windows1252,
}, .{}));
// uppercase escapes
try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
.slice =
\\"\A\N\R\T\\"
,
.code_page = .windows1252,
}, .{}));
// backslash on its own
try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
.slice =
\\"\"
,
.code_page = .windows1252,
}, .{}));
// unrecognized escapes
try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
.slice =
\\"\b"
,
.code_page = .windows1252,
}, .{}));
// escaped carriage returns
try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
.{},
));
// escaped newlines
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
.{},
));
// escaped CRLF pairs
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
.{},
));
// escaped newlines with other whitespace
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 },
.{},
));
// literal tab characters get converted to spaces (dependent on source file columns)
try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\t\"", .code_page = .windows1252 },
.{},
));
try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString(
arena,
.{ .slice = "\"abc\t\"", .code_page = .windows1252 },
.{},
));
try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString(
arena,
.{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
.{},
));
try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\\\t\"", .code_page = .windows1252 },
.{},
));
// literal CR's get dropped
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
.{},
));
// contiguous newlines and whitespace get collapsed to <space><newline>
try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 },
.{},
));
}
test "parse quoted ascii string with utf8 code page" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\"", .code_page = .utf8 },
.{},
));
// Codepoints that don't have a Windows-1252 representation get converted to ?
try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
arena,
.{ .slice = "\"кириллица\"", .code_page = .utf8 },
.{},
));
// Codepoints that have a best fit mapping get converted accordingly,
// these are box drawing codepoints
try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
arena,
.{ .slice = "\"┌─┐\"", .code_page = .utf8 },
.{},
));
// Invalid UTF-8 gets converted to ? depending on well-formedness
try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
.{},
));
// Codepoints that would require a UTF-16 surrogate pair get converted to ??
try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
.{},
));
// Output code page changes how invalid UTF-8 gets converted, since it
// now encodes the result as UTF-8 so it can write replacement characters.
try std.testing.expectEqualSlices(u8, "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
.{ .output_code_page = .utf8 },
));
try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
arena,
.{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
.{ .output_code_page = .utf8 },
));
}
test "parse quoted wide string" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 'h', 'e', 'l', 'l', 'o' }, try parseQuotedWideString(arena, .{
.slice =
\\L"hello"
,
.code_page = .windows1252,
}, .{}));
// hex with 0 digits
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
.slice =
\\L"\x"
,
.code_page = .windows1252,
}, .{}));
// hex max of 4 digits
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0xFFFF, 'f' }, try parseQuotedWideString(arena, .{
.slice =
\\L"\XfFfFf"
,
.code_page = .windows1252,
}, .{}));
// octal max of 7 digits
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ 0x9493, '3', '3' }, try parseQuotedWideString(arena, .{
.slice =
\\L"\111222333"
,
.code_page = .windows1252,
}, .{}));
// octal overflow
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0xFF01}, try parseQuotedWideString(arena, .{
.slice =
\\L"\777401"
,
.code_page = .windows1252,
}, .{}));
// literal tab characters get converted to spaces (dependent on source file columns)
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString(
arena,
.{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
.{},
));
// Windows-1252 conversion
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
arena,
.{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
.{},
));
// Invalid escape sequences are skipped
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
arena,
.{ .slice = "L\"\\H\"", .code_page = .windows1252 },
.{},
));
}
test "parse quoted wide string with utf8 code page" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
arena,
.{ .slice = "L\"\"", .code_page = .utf8 },
.{},
));
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
arena,
.{ .slice = "L\"кириллица\"", .code_page = .utf8 },
.{},
));
// Invalid UTF-8 gets converted to <EFBFBD> depending on well-formedness
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>"), try parseQuotedWideString(
arena,
.{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
.{},
));
}
test "parse quoted ascii string as wide string" {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
arena,
.{ .slice = "\"кириллица\"", .code_page = .utf8 },
.{},
));
// Whether or not invalid escapes are skipped is still determined by the L prefix
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
arena,
.{ .slice = "\"\\H\"", .code_page = .windows1252 },
.{},
));
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
arena,
.{ .slice = "L\"\\H\"", .code_page = .windows1252 },
.{},
));
// Maximum escape sequence value is also determined by the L prefix
try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\x1234"), try parseQuotedStringAsWideString(
arena,
.{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
.{},
));
try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x1234}, try parseQuotedStringAsWideString(
arena,
.{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
.{},
));
}
pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
// 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
// 5 => 3, 6 => 2, 7 => 1, 8 => 8
return tab_columns - (column % tab_columns);
}
pub const Number = struct {
value: u32,
is_long: bool = false,
pub fn asWord(self: Number) u16 {
return @truncate(self.value);
}
pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
const result = switch (operator_char) {
'-' => lhs.value -% rhs.value,
'+' => lhs.value +% rhs.value,
'|' => lhs.value | rhs.value,
'&' => lhs.value & rhs.value,
else => unreachable, // invalid operator, this would be a lexer/parser bug
};
return .{
.value = result,
.is_long = lhs.is_long or rhs.is_long,
};
}
};
/// Assumes that number literals normally rejected by RC's preprocessor
/// are similarly rejected before being parsed.
///
/// Relevant RC preprocessor errors:
/// RC2021: expected exponent value, not '<digit>'
/// example that is rejected: 1e1
/// example that is accepted: 1ea
/// (this function will parse the two examples above the same)
pub fn parseNumberLiteral(bytes: SourceBytes) Number {
std.debug.assert(bytes.slice.len > 0);
var result = Number{ .value = 0, .is_long = false };
var radix: u8 = 10;
var buf = bytes.slice;
const Prefix = enum { none, minus, complement };
var prefix: Prefix = .none;
switch (buf[0]) {
'-' => {
prefix = .minus;
buf = buf[1..];
},
'~' => {
prefix = .complement;
buf = buf[1..];
},
else => {},
}
if (buf.len > 2 and buf[0] == '0') {
switch (buf[1]) {
'o' => { // octal radix prefix is case-sensitive
radix = 8;
buf = buf[2..];
},
'x', 'X' => {
radix = 16;
buf = buf[2..];
},
else => {},
}
}
var i: usize = 0;
while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
const c = codepoint.value;
if (c == 'L' or c == 'l') {
result.is_long = true;
break;
}
const digit = switch (c) {
// On invalid digit for the radix, just stop parsing but don't fail
0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
else => break,
};
if (result.value != 0) {
result.value *%= radix;
}
result.value +%= digit;
}
switch (prefix) {
.none => {},
.minus => result.value = 0 -% result.value,
.complement => result.value = ~result.value,
}
return result;
}
test "parse number literal" {
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
// can handle any length of number, wraps on overflow appropriately
const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
// anything after L is ignored
try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
}

1880
src/resinator/parse.zig Normal file

File diff suppressed because it is too large Load Diff

407
src/resinator/rc.zig Normal file
View File

@ -0,0 +1,407 @@
const std = @import("std");
const utils = @import("utils.zig");
const res = @import("res.zig");
const SourceBytes = @import("literals.zig").SourceBytes;
// https://learn.microsoft.com/en-us/windows/win32/menurc/about-resource-files
pub const Resource = enum {
accelerators,
bitmap,
cursor,
dialog,
dialogex,
/// As far as I can tell, this is undocumented; the most I could find was this:
/// https://www.betaarchive.com/wiki/index.php/Microsoft_KB_Archive/91697
dlginclude,
/// Undocumented, basically works exactly like RCDATA
dlginit,
font,
html,
icon,
menu,
menuex,
messagetable,
plugplay, // Obsolete
rcdata,
stringtable,
/// Undocumented
toolbar,
user_defined,
versioninfo,
vxd, // Obsolete
// Types that are treated as a user-defined type when encountered, but have
// special meaning without the Visual Studio GUI. We match the Win32 RC compiler
// behavior by acting as if these keyword don't exist when compiling the .rc
// (thereby treating them as user-defined).
//textinclude, // A special resource that is interpreted by Visual C++.
//typelib, // A special resource that is used with the /TLBID and /TLBOUT linker options
// Types that can only be specified by numbers, they don't have keywords
cursor_num,
icon_num,
string_num,
anicursor_num,
aniicon_num,
fontdir_num,
manifest_num,
const map = std.ComptimeStringMapWithEql(Resource, .{
.{ "ACCELERATORS", .accelerators },
.{ "BITMAP", .bitmap },
.{ "CURSOR", .cursor },
.{ "DIALOG", .dialog },
.{ "DIALOGEX", .dialogex },
.{ "DLGINCLUDE", .dlginclude },
.{ "DLGINIT", .dlginit },
.{ "FONT", .font },
.{ "HTML", .html },
.{ "ICON", .icon },
.{ "MENU", .menu },
.{ "MENUEX", .menuex },
.{ "MESSAGETABLE", .messagetable },
.{ "PLUGPLAY", .plugplay },
.{ "RCDATA", .rcdata },
.{ "STRINGTABLE", .stringtable },
.{ "TOOLBAR", .toolbar },
.{ "VERSIONINFO", .versioninfo },
.{ "VXD", .vxd },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
pub fn fromString(bytes: SourceBytes) Resource {
const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(bytes);
if (maybe_ordinal) |ordinal| {
if (ordinal.ordinal >= 256) return .user_defined;
return fromRT(@enumFromInt(ordinal.ordinal));
}
return map.get(bytes.slice) orelse .user_defined;
}
// TODO: Some comptime validation that RT <-> Resource conversion is synced?
pub fn fromRT(rt: res.RT) Resource {
return switch (rt) {
.ACCELERATOR => .accelerators,
.ANICURSOR => .anicursor_num,
.ANIICON => .aniicon_num,
.BITMAP => .bitmap,
.CURSOR => .cursor_num,
.DIALOG => .dialog,
.DLGINCLUDE => .dlginclude,
.DLGINIT => .dlginit,
.FONT => .font,
.FONTDIR => .fontdir_num,
.GROUP_CURSOR => .cursor,
.GROUP_ICON => .icon,
.HTML => .html,
.ICON => .icon_num,
.MANIFEST => .manifest_num,
.MENU => .menu,
.MESSAGETABLE => .messagetable,
.PLUGPLAY => .plugplay,
.RCDATA => .rcdata,
.STRING => .string_num,
.TOOLBAR => .toolbar,
.VERSION => .versioninfo,
.VXD => .vxd,
_ => .user_defined,
};
}
pub fn canUseRawData(resource: Resource) bool {
return switch (resource) {
.user_defined,
.html,
.plugplay, // Obsolete
.rcdata,
.vxd, // Obsolete
.manifest_num,
.dlginit,
=> true,
else => false,
};
}
pub fn nameForErrorDisplay(resource: Resource) []const u8 {
return switch (resource) {
// zig fmt: off
.accelerators, .bitmap, .cursor, .dialog, .dialogex, .dlginclude, .dlginit, .font,
.html, .icon, .menu, .menuex, .messagetable, .plugplay, .rcdata, .stringtable,
.toolbar, .versioninfo, .vxd => @tagName(resource),
// zig fmt: on
.user_defined => "user-defined",
.cursor_num => std.fmt.comptimePrint("{d} (cursor)", .{@intFromEnum(res.RT.CURSOR)}),
.icon_num => std.fmt.comptimePrint("{d} (icon)", .{@intFromEnum(res.RT.ICON)}),
.string_num => std.fmt.comptimePrint("{d} (string)", .{@intFromEnum(res.RT.STRING)}),
.anicursor_num => std.fmt.comptimePrint("{d} (anicursor)", .{@intFromEnum(res.RT.ANICURSOR)}),
.aniicon_num => std.fmt.comptimePrint("{d} (aniicon)", .{@intFromEnum(res.RT.ANIICON)}),
.fontdir_num => std.fmt.comptimePrint("{d} (fontdir)", .{@intFromEnum(res.RT.FONTDIR)}),
.manifest_num => std.fmt.comptimePrint("{d} (manifest)", .{@intFromEnum(res.RT.MANIFEST)}),
};
}
};
/// https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable-resource#parameters
/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialog-resource#parameters
/// https://learn.microsoft.com/en-us/windows/win32/menurc/dialogex-resource#parameters
pub const OptionalStatements = enum {
characteristics,
language,
version,
// DIALOG
caption,
class,
exstyle,
font,
menu,
style,
pub const map = std.ComptimeStringMapWithEql(OptionalStatements, .{
.{ "CHARACTERISTICS", .characteristics },
.{ "LANGUAGE", .language },
.{ "VERSION", .version },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
pub const dialog_map = std.ComptimeStringMapWithEql(OptionalStatements, .{
.{ "CAPTION", .caption },
.{ "CLASS", .class },
.{ "EXSTYLE", .exstyle },
.{ "FONT", .font },
.{ "MENU", .menu },
.{ "STYLE", .style },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
pub const Control = enum {
auto3state,
autocheckbox,
autoradiobutton,
checkbox,
combobox,
control,
ctext,
defpushbutton,
edittext,
hedit,
iedit,
groupbox,
icon,
listbox,
ltext,
pushbox,
pushbutton,
radiobutton,
rtext,
scrollbar,
state3,
userbutton,
pub const map = std.ComptimeStringMapWithEql(Control, .{
.{ "AUTO3STATE", .auto3state },
.{ "AUTOCHECKBOX", .autocheckbox },
.{ "AUTORADIOBUTTON", .autoradiobutton },
.{ "CHECKBOX", .checkbox },
.{ "COMBOBOX", .combobox },
.{ "CONTROL", .control },
.{ "CTEXT", .ctext },
.{ "DEFPUSHBUTTON", .defpushbutton },
.{ "EDITTEXT", .edittext },
.{ "HEDIT", .hedit },
.{ "IEDIT", .iedit },
.{ "GROUPBOX", .groupbox },
.{ "ICON", .icon },
.{ "LISTBOX", .listbox },
.{ "LTEXT", .ltext },
.{ "PUSHBOX", .pushbox },
.{ "PUSHBUTTON", .pushbutton },
.{ "RADIOBUTTON", .radiobutton },
.{ "RTEXT", .rtext },
.{ "SCROLLBAR", .scrollbar },
.{ "STATE3", .state3 },
.{ "USERBUTTON", .userbutton },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
pub fn hasTextParam(control: Control) bool {
switch (control) {
.scrollbar, .listbox, .iedit, .hedit, .edittext, .combobox => return false,
else => return true,
}
}
};
pub const ControlClass = struct {
pub const map = std.ComptimeStringMapWithEql(res.ControlClass, .{
.{ "BUTTON", .button },
.{ "EDIT", .edit },
.{ "STATIC", .static },
.{ "LISTBOX", .listbox },
.{ "SCROLLBAR", .scrollbar },
.{ "COMBOBOX", .combobox },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
/// Like `map.get` but works on WTF16 strings, for use with parsed
/// string literals ("BUTTON", or even "\x42UTTON")
pub fn fromWideString(str: []const u16) ?res.ControlClass {
const utf16Literal = std.unicode.utf8ToUtf16LeStringLiteral;
return if (ascii.eqlIgnoreCaseW(str, utf16Literal("BUTTON")))
.button
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("EDIT")))
.edit
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("STATIC")))
.static
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("LISTBOX")))
.listbox
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("SCROLLBAR")))
.scrollbar
else if (ascii.eqlIgnoreCaseW(str, utf16Literal("COMBOBOX")))
.combobox
else
null;
}
};
const ascii = struct {
/// Compares ASCII values case-insensitively, non-ASCII values are compared directly
pub fn eqlIgnoreCaseW(a: []const u16, b: []const u16) bool {
if (a.len != b.len) return false;
for (a, b) |a_c, b_c| {
if (a_c < 128) {
if (std.ascii.toLower(@intCast(a_c)) != std.ascii.toLower(@intCast(b_c))) return false;
} else {
if (a_c != b_c) return false;
}
}
return true;
}
};
pub const MenuItem = enum {
menuitem,
popup,
pub const map = std.ComptimeStringMapWithEql(MenuItem, .{
.{ "MENUITEM", .menuitem },
.{ "POPUP", .popup },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
pub fn isSeparator(bytes: []const u8) bool {
return std.ascii.eqlIgnoreCase(bytes, "SEPARATOR");
}
pub const Option = enum {
checked,
grayed,
help,
inactive,
menubarbreak,
menubreak,
pub const map = std.ComptimeStringMapWithEql(Option, .{
.{ "CHECKED", .checked },
.{ "GRAYED", .grayed },
.{ "HELP", .help },
.{ "INACTIVE", .inactive },
.{ "MENUBARBREAK", .menubarbreak },
.{ "MENUBREAK", .menubreak },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
};
pub const ToolbarButton = enum {
button,
separator,
pub const map = std.ComptimeStringMapWithEql(ToolbarButton, .{
.{ "BUTTON", .button },
.{ "SEPARATOR", .separator },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
pub const VersionInfo = enum {
file_version,
product_version,
file_flags_mask,
file_flags,
file_os,
file_type,
file_subtype,
pub const map = std.ComptimeStringMapWithEql(VersionInfo, .{
.{ "FILEVERSION", .file_version },
.{ "PRODUCTVERSION", .product_version },
.{ "FILEFLAGSMASK", .file_flags_mask },
.{ "FILEFLAGS", .file_flags },
.{ "FILEOS", .file_os },
.{ "FILETYPE", .file_type },
.{ "FILESUBTYPE", .file_subtype },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
pub const VersionBlock = enum {
block,
value,
pub const map = std.ComptimeStringMapWithEql(VersionBlock, .{
.{ "BLOCK", .block },
.{ "VALUE", .value },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
/// Keywords that are be the first token in a statement and (if so) dictate how the rest
/// of the statement is parsed.
pub const TopLevelKeywords = enum {
language,
version,
characteristics,
stringtable,
pub const map = std.ComptimeStringMapWithEql(TopLevelKeywords, .{
.{ "LANGUAGE", .language },
.{ "VERSION", .version },
.{ "CHARACTERISTICS", .characteristics },
.{ "STRINGTABLE", .stringtable },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
pub const CommonResourceAttributes = enum {
preload,
loadoncall,
fixed,
moveable,
discardable,
pure,
impure,
shared,
nonshared,
pub const map = std.ComptimeStringMapWithEql(CommonResourceAttributes, .{
.{ "PRELOAD", .preload },
.{ "LOADONCALL", .loadoncall },
.{ "FIXED", .fixed },
.{ "MOVEABLE", .moveable },
.{ "DISCARDABLE", .discardable },
.{ "PURE", .pure },
.{ "IMPURE", .impure },
.{ "SHARED", .shared },
.{ "NONSHARED", .nonshared },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};
pub const AcceleratorTypeAndOptions = enum {
virtkey,
ascii,
noinvert,
alt,
shift,
control,
pub const map = std.ComptimeStringMapWithEql(AcceleratorTypeAndOptions, .{
.{ "VIRTKEY", .virtkey },
.{ "ASCII", .ascii },
.{ "NOINVERT", .noinvert },
.{ "ALT", .alt },
.{ "SHIFT", .shift },
.{ "CONTROL", .control },
}, std.comptime_string_map.eqlAsciiIgnoreCase);
};

1108
src/resinator/res.zig Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,684 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString;
const lex = @import("lex.zig");
pub const ParseLineCommandsResult = struct {
result: []u8,
mappings: SourceMappings,
};
const CurrentMapping = struct {
line_num: usize = 1,
filename: std.ArrayListUnmanaged(u8) = .{},
pending: bool = true,
ignore_contents: bool = false,
};
pub const ParseAndRemoveLineCommandsOptions = struct {
initial_filename: ?[]const u8 = null,
};
/// Parses and removes #line commands as well as all source code that is within a file
/// with .c or .h extensions.
///
/// > RC treats files with the .c and .h extensions in a special manner. It
/// > assumes that a file with one of these extensions does not contain
/// > resources. If a file has the .c or .h file name extension, RC ignores all
/// > lines in the file except the preprocessor directives. Therefore, to
/// > include a file that contains resources in another resource script, give
/// > the file to be included an extension other than .c or .h.
/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
///
/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
/// between the lines and their corresponding lines in their original files.
///
/// `buf` must be at least as long as `source`
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
///
/// If `options.initial_filename` is provided, that filename is guaranteed to be
/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
var parse_result = ParseLineCommandsResult{
.result = undefined,
.mappings = .{},
};
errdefer parse_result.mappings.deinit(allocator);
var current_mapping: CurrentMapping = .{};
defer current_mapping.filename.deinit(allocator);
if (options.initial_filename) |initial_filename| {
try current_mapping.filename.appendSlice(allocator, initial_filename);
parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
}
std.debug.assert(buf.len >= source.len);
var result = UncheckedSliceWriter{ .slice = buf };
const State = enum {
line_start,
preprocessor,
non_preprocessor,
};
var state: State = .line_start;
var index: usize = 0;
var pending_start: ?usize = null;
var preprocessor_start: usize = 0;
var line_number: usize = 1;
while (index < source.len) : (index += 1) {
const c = source[index];
switch (state) {
.line_start => switch (c) {
'#' => {
preprocessor_start = index;
state = .preprocessor;
if (pending_start == null) {
pending_start = index;
}
},
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
if (!current_mapping.ignore_contents) {
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
pending_start = null;
},
' ', '\t', '\x0b', '\x0c' => {
if (pending_start == null) {
pending_start = index;
}
},
else => {
state = .non_preprocessor;
if (pending_start != null) {
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start.? .. index + 1]);
}
pending_start = null;
continue;
}
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.preprocessor => switch (c) {
'\r', '\n' => {
// Now that we have the full line we can decide what to do with it
const preprocessor_str = source[preprocessor_start..index];
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
try handleLineCommand(allocator, preprocessor_str, &current_mapping);
} else {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
if (!current_mapping.ignore_contents) {
const line_ending_len: usize = if (is_crlf) 2 else 1;
result.writeSlice(source[pending_start.? .. index + line_ending_len]);
line_number += 1;
}
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
else => {},
},
.non_preprocessor => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
if (!current_mapping.ignore_contents) {
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
}
} else {
switch (state) {
.line_start => {},
.non_preprocessor => {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
},
.preprocessor => {
// Now that we have the full line we can decide what to do with it
const preprocessor_str = source[preprocessor_start..index];
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
try handleLineCommand(allocator, preprocessor_str, &current_mapping);
} else {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start.?..index]);
}
}
},
}
}
parse_result.result = result.getWritten();
// Remove whitespace from the end of the result. This avoids issues when the
// preprocessor adds a newline to the end of the file, since then the
// post-preprocessed source could have more lines than the corresponding input source and
// the inserted line can't be mapped to any lines in the original file.
// There's no way that whitespace at the end of a file can affect the parsing
// of the RC script so this is okay to do unconditionally.
// TODO: There might be a better way around this
while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
parse_result.result.len -= 1;
}
// If there have been no line mappings at all, then we're dealing with an empty file.
// In this case, we want to fake a line mapping just so that we return something
// that is useable in the same way that a non-empty mapping would be.
if (parse_result.mappings.mapping.items.len == 0) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
}
return parse_result;
}
/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
if (next_index >= source.len) return false;
const next_ending = source[next_index];
if (next_ending != '\r' and next_ending != '\n') return false;
// can't be \n\n or \r\r
if (line_ending == next_ending) return false;
return true;
}
pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
try mapping.set(allocator, post_processed_line_number, .{
.start_line = current_mapping.line_num,
.end_line = current_mapping.line_num,
.filename_offset = filename_offset,
});
current_mapping.line_num += 1;
current_mapping.pending = false;
}
// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void {
// TODO: Are there other whitespace characters that should be included?
var tokenizer = std.mem.tokenize(u8, line_command, " \t");
const line_directive = tokenizer.next() orelse return; // #line
if (!std.mem.eql(u8, line_directive, "#line")) return;
const linenum_str = tokenizer.next() orelse return;
const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return;
var filename_literal = tokenizer.rest();
while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
filename_literal.len -= 1;
}
if (filename_literal.len < 2) return;
const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
if (!is_quoted) return;
const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
else => return,
};
defer allocator.free(filename);
current_mapping.line_num = linenum;
current_mapping.filename.clearRetainingCapacity();
try current_mapping.filename.appendSlice(allocator, filename);
current_mapping.pending = true;
current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
}
pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
var buf = try allocator.alloc(u8, source.len);
errdefer allocator.free(buf);
var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
result.result = try allocator.realloc(buf, result.result.len);
return result;
}
/// C-style string parsing with a few caveats:
/// - The str cannot contain newlines or carriage returns
/// - Hex and octal escape are limited to u8
/// - No handling/support for L, u, or U prefixed strings
/// - The start and end double quotes should be omitted from the `str`
/// - Other than the above, does not assume any validity of the strings (i.e. there
/// may be unescaped double quotes within the str) and will return error.InvalidString
/// on any problems found.
///
/// The result is a UTF-8 encoded string.
fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
const State = enum {
string,
escape,
escape_hex,
escape_octal,
escape_u,
};
var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
errdefer filename.deinit();
var state: State = .string;
var index: usize = 0;
var escape_len: usize = undefined;
var escape_val: u64 = undefined;
var escape_expected_len: u8 = undefined;
while (index < str.len) : (index += 1) {
const c = str[index];
switch (state) {
.string => switch (c) {
'\\' => state = .escape,
'"' => return error.InvalidString,
else => filename.appendAssumeCapacity(c),
},
.escape => switch (c) {
'\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
const escaped_c = switch (c) {
'\'', '"', '\\', '?' => c,
'n' => '\n',
'r' => '\r',
't' => '\t',
'a' => '\x07',
'b' => '\x08',
'e' => '\x1b', // non-standard
'f' => '\x0c',
'v' => '\x0b',
else => unreachable,
};
filename.appendAssumeCapacity(escaped_c);
state = .string;
},
'x' => {
escape_val = 0;
escape_len = 0;
state = .escape_hex;
},
'0'...'7' => {
escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
escape_len = 1;
state = .escape_octal;
},
'u' => {
escape_val = 0;
escape_len = 0;
state = .escape_u;
escape_expected_len = 4;
},
'U' => {
escape_val = 0;
escape_len = 0;
state = .escape_u;
escape_expected_len = 8;
},
else => return error.InvalidString,
},
.escape_hex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
},
else => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
index -= 1; // reconsume
},
},
.escape_octal => switch (c) {
'0'...'7' => {
const digit = std.fmt.charToDigit(c, 8) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
if (escape_len == 3) {
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
}
},
else => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
index -= 1; // reconsume
},
},
.escape_u => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
if (escape_len == escape_expected_len) {
var buf: [4]u8 = undefined;
const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
state = .string;
}
},
// Requires escape_expected_len valid hex digits
else => return error.InvalidString,
},
}
} else {
switch (state) {
.string => {},
.escape, .escape_u => return error.InvalidString,
.escape_hex => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
},
.escape_octal => {
filename.appendAssumeCapacity(@intCast(escape_val));
},
}
}
return filename.toOwnedSlice();
}
fn testParseFilename(expected: []const u8, input: []const u8) !void {
const parsed = try parseFilename(std.testing.allocator, input);
defer std.testing.allocator.free(parsed);
return std.testing.expectEqualSlices(u8, expected, parsed);
}
test parseFilename {
try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
try testParseFilename("\xABz\x53", "\\xABz\\123");
try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
}
pub const SourceMappings = struct {
/// line number -> span where the index is (line number - 1)
mapping: std.ArrayListUnmanaged(SourceSpan) = .{},
files: StringTable = .{},
/// The default assumes that the first filename added is the root file.
/// The value should be set to the correct offset if that assumption does not hold.
root_filename_offset: u32 = 0,
pub const SourceSpan = struct {
start_line: usize,
end_line: usize,
filename_offset: u32,
};
pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
self.files.deinit(allocator);
self.mapping.deinit(allocator);
}
pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void {
var ptr = try self.expandAndGet(allocator, line_num);
ptr.* = span;
}
pub fn has(self: *SourceMappings, line_num: usize) bool {
return self.mapping.items.len >= line_num;
}
/// Note: `line_num` is 1-indexed
pub fn get(self: SourceMappings, line_num: usize) SourceSpan {
return self.mapping.items[line_num - 1];
}
pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan {
return &self.mapping.items[line_num - 1];
}
/// Expands the number of lines in the mapping to include the requested
/// line number (if necessary) and returns a pointer to the value at that
/// line number.
///
/// Note: `line_num` is 1-indexed
pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan {
try self.mapping.resize(allocator, line_num);
return &self.mapping.items[line_num - 1];
}
pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void {
std.debug.assert(num_following_lines_to_collapse > 0);
var span_to_collapse_into = self.getPtr(line_num);
const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse);
span_to_collapse_into.end_line = last_collapsed_span.end_line;
const after_collapsed_start = line_num + num_following_lines_to_collapse;
const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse;
std.mem.copy(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]);
self.mapping.items.len = new_num_lines;
}
/// Returns true if the line is from the main/root file (i.e. not a file that has been
/// `#include`d).
pub fn isRootFile(self: *SourceMappings, line_num: usize) bool {
const line_mapping = self.get(line_num);
if (line_mapping.filename_offset == self.root_filename_offset) return true;
return false;
}
};
test "SourceMappings collapse" {
const allocator = std.testing.allocator;
var mappings = SourceMappings{};
defer mappings.deinit(allocator);
const filename_offset = try mappings.files.put(allocator, "test.rc");
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset });
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset });
try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset });
try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset });
mappings.collapse(1, 2);
try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len);
try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line);
try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line);
}
/// Same thing as StringTable in Zig's src/Wasm.zig
pub const StringTable = struct {
data: std.ArrayListUnmanaged(u8) = .{},
map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
pub fn deinit(self: *StringTable, allocator: Allocator) void {
self.data.deinit(allocator);
self.map.deinit(allocator);
}
pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
const result = try self.map.getOrPutContextAdapted(
allocator,
value,
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
.{ .bytes = &self.data },
);
if (result.found_existing) {
return result.key_ptr.*;
}
try self.data.ensureUnusedCapacity(allocator, value.len + 1);
const offset: u32 = @intCast(self.data.items.len);
self.data.appendSliceAssumeCapacity(value);
self.data.appendAssumeCapacity(0);
result.key_ptr.* = offset;
return offset;
}
pub fn get(self: StringTable, offset: u32) []const u8 {
std.debug.assert(offset < self.data.items.len);
return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
}
pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
return self.map.getKeyAdapted(
value,
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
);
}
};
const ExpectedSourceSpan = struct {
start_line: usize,
end_line: usize,
filename: []const u8,
};
fn testParseAndRemoveLineCommands(
expected: []const u8,
comptime expected_spans: []const ExpectedSourceSpan,
source: []const u8,
options: ParseAndRemoveLineCommandsOptions,
) !void {
var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
defer std.testing.allocator.free(results.result);
defer results.mappings.deinit(std.testing.allocator);
try std.testing.expectEqualStrings(expected, results.result);
expectEqualMappings(expected_spans, results.mappings) catch |err| {
std.debug.print("\nexpected mappings:\n", .{});
for (expected_spans, 0..) |span, i| {
const line_num = i + 1;
std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
}
std.debug.print("\nactual mappings:\n", .{});
for (results.mappings.mapping.items, 0..) |span, i| {
const line_num = i + 1;
const filename = results.mappings.files.get(span.filename_offset);
std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line });
}
std.debug.print("\n", .{});
return err;
};
}
fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len);
for (expected_spans, 0..) |expected_span, i| {
const line_num = i + 1;
const span = mappings.get(line_num);
const filename = mappings.files.get(span.filename_offset);
try std.testing.expectEqual(expected_span.start_line, span.start_line);
try std.testing.expectEqual(expected_span.end_line, span.end_line);
try std.testing.expectEqualStrings(expected_span.filename, filename);
}
}
test "basic" {
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
}, "#line 1 \"blah.rc\"", .{});
}
test "only removes line commands" {
try testParseAndRemoveLineCommands(
\\#pragma code_page(65001)
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
},
\\#line 1 "blah.rc"
\\#pragma code_page(65001)
, .{});
}
test "whitespace and line endings" {
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
}, "#line \t 1 \t \"blah.rc\"\r\n", .{});
}
test "example" {
try testParseAndRemoveLineCommands(
\\
\\included RCDATA {"hello"}
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
},
\\#line 1 "rcdata.rc"
\\#line 1 "<built-in>"
\\#line 1 "<built-in>"
\\#line 355 "<built-in>"
\\#line 1 "<command line>"
\\#line 1 "<built-in>"
\\#line 1 "rcdata.rc"
\\#line 1 "./header.h"
\\
\\
\\2 RCDATA {"blah"}
\\
\\
\\#line 1 "./included.rc"
\\
\\included RCDATA {"hello"}
\\#line 7 "./header.h"
\\#line 1 "rcdata.rc"
, .{});
}
test "CRLF and other line endings" {
try testParseAndRemoveLineCommands(
"hello\r\n#pragma code_page(65001)\r\nworld",
&[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
},
"#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
.{},
);
}
test "no line commands" {
try testParseAndRemoveLineCommands(
\\1 RCDATA {"blah"}
\\2 RCDATA {"blah"}
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
},
\\1 RCDATA {"blah"}
\\2 RCDATA {"blah"}
, .{ .initial_filename = "blah.rc" });
}
test "in place" {
var mut_source = "#line 1 \"blah.rc\"".*;
var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
defer result.mappings.deinit(std.testing.allocator);
try std.testing.expectEqualStrings("", result.result);
}

83
src/resinator/utils.zig Normal file
View File

@ -0,0 +1,83 @@
const std = @import("std");
const builtin = @import("builtin");
/// Like std.io.FixedBufferStream but does no bounds checking
pub const UncheckedSliceWriter = struct {
const Self = @This();
pos: usize = 0,
slice: []u8,
pub fn write(self: *Self, char: u8) void {
self.slice[self.pos] = char;
self.pos += 1;
}
pub fn writeSlice(self: *Self, slice: []const u8) void {
for (slice) |c| {
self.write(c);
}
}
pub fn getWritten(self: Self) []u8 {
return self.slice[0..self.pos];
}
};
/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if
/// a directory is attempted to be opened.
/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed.
pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File {
const file = try cwd.openFile(path, flags);
errdefer file.close();
// https://github.com/ziglang/zig/issues/5732
if (builtin.os.tag != .windows) {
const stat = try file.stat();
if (stat.kind == .directory)
return error.IsDir;
}
return file;
}
/// Emulates the Windows implementation of `iswdigit`, but only returns true
/// for the non-ASCII digits that `iswdigit` on Windows would return true for.
pub fn isNonAsciiDigit(c: u21) bool {
return switch (c) {
'²',
'³',
'¹',
'\u{660}'...'\u{669}',
'\u{6F0}'...'\u{6F9}',
'\u{7C0}'...'\u{7C9}',
'\u{966}'...'\u{96F}',
'\u{9E6}'...'\u{9EF}',
'\u{A66}'...'\u{A6F}',
'\u{AE6}'...'\u{AEF}',
'\u{B66}'...'\u{B6F}',
'\u{BE6}'...'\u{BEF}',
'\u{C66}'...'\u{C6F}',
'\u{CE6}'...'\u{CEF}',
'\u{D66}'...'\u{D6F}',
'\u{E50}'...'\u{E59}',
'\u{ED0}'...'\u{ED9}',
'\u{F20}'...'\u{F29}',
'\u{1040}'...'\u{1049}',
'\u{1090}'...'\u{1099}',
'\u{17E0}'...'\u{17E9}',
'\u{1810}'...'\u{1819}',
'\u{1946}'...'\u{194F}',
'\u{19D0}'...'\u{19D9}',
'\u{1B50}'...'\u{1B59}',
'\u{1BB0}'...'\u{1BB9}',
'\u{1C40}'...'\u{1C49}',
'\u{1C50}'...'\u{1C59}',
'\u{A620}'...'\u{A629}',
'\u{A8D0}'...'\u{A8D9}',
'\u{A900}'...'\u{A909}',
'\u{AA50}'...'\u{AA59}',
'\u{FF10}'...'\u{FF19}',
=> true,
else => false,
};
}

View File

@ -0,0 +1,588 @@
const std = @import("std");
pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize {
var bytes_written: usize = 0;
var utf8_buf: [3]u8 = undefined;
while (true) {
const c = reader.readByte() catch |err| switch (err) {
error.EndOfStream => return bytes_written,
else => |e| return e,
};
const codepoint = toCodepoint(c);
if (codepoint <= 0x7F) {
try writer.writeByte(c);
bytes_written += 1;
} else {
const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable;
try writer.writeAll(utf8_buf[0..utf8_len]);
bytes_written += utf8_len;
}
}
}
/// Returns the number of code units written to the writer
pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 {
// Guaranteed to need exactly the same number of code units as Windows-1252 bytes
var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0);
errdefer allocator.free(utf16_slice);
for (win1252_str, 0..) |c, i| {
utf16_slice[i] = toCodepoint(c);
}
return utf16_slice;
}
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
pub fn toCodepoint(c: u8) u16 {
return switch (c) {
0x80 => 0x20ac, // Euro Sign
0x82 => 0x201a, // Single Low-9 Quotation Mark
0x83 => 0x0192, // Latin Small Letter F With Hook
0x84 => 0x201e, // Double Low-9 Quotation Mark
0x85 => 0x2026, // Horizontal Ellipsis
0x86 => 0x2020, // Dagger
0x87 => 0x2021, // Double Dagger
0x88 => 0x02c6, // Modifier Letter Circumflex Accent
0x89 => 0x2030, // Per Mille Sign
0x8a => 0x0160, // Latin Capital Letter S With Caron
0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark
0x8c => 0x0152, // Latin Capital Ligature Oe
0x8e => 0x017d, // Latin Capital Letter Z With Caron
0x91 => 0x2018, // Left Single Quotation Mark
0x92 => 0x2019, // Right Single Quotation Mark
0x93 => 0x201c, // Left Double Quotation Mark
0x94 => 0x201d, // Right Double Quotation Mark
0x95 => 0x2022, // Bullet
0x96 => 0x2013, // En Dash
0x97 => 0x2014, // Em Dash
0x98 => 0x02dc, // Small Tilde
0x99 => 0x2122, // Trade Mark Sign
0x9a => 0x0161, // Latin Small Letter S With Caron
0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark
0x9c => 0x0153, // Latin Small Ligature Oe
0x9e => 0x017e, // Latin Small Letter Z With Caron
0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis
else => c,
};
}
/// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
/// Plus some mappings found empirically by iterating all codepoints:
/// 0x2007 => 0xA0, // Figure Space
/// 0x2008 => ' ', // Punctuation Space
/// 0x2009 => ' ', // Thin Space
/// 0x200A => ' ', // Hair Space
/// 0x2012 => '-', // Figure Dash
/// 0x2015 => '-', // Horizontal Bar
/// 0x201B => '\'', // Single High-reversed-9 Quotation Mark
/// 0x201F => '"', // Double High-reversed-9 Quotation Mark
/// 0x202F => 0xA0, // Narrow No-Break Space
/// 0x2033 => '"', // Double Prime
/// 0x2036 => '"', // Reversed Double Prime
pub fn bestFitFromCodepoint(codepoint: u21) ?u8 {
return switch (codepoint) {
0x00...0x7F,
0x81,
0x8D,
0x8F,
0x90,
0x9D,
0xA0...0xFF,
=> @intCast(codepoint),
0x0100 => 0x41, // Latin Capital Letter A With Macron
0x0101 => 0x61, // Latin Small Letter A With Macron
0x0102 => 0x41, // Latin Capital Letter A With Breve
0x0103 => 0x61, // Latin Small Letter A With Breve
0x0104 => 0x41, // Latin Capital Letter A With Ogonek
0x0105 => 0x61, // Latin Small Letter A With Ogonek
0x0106 => 0x43, // Latin Capital Letter C With Acute
0x0107 => 0x63, // Latin Small Letter C With Acute
0x0108 => 0x43, // Latin Capital Letter C With Circumflex
0x0109 => 0x63, // Latin Small Letter C With Circumflex
0x010a => 0x43, // Latin Capital Letter C With Dot Above
0x010b => 0x63, // Latin Small Letter C With Dot Above
0x010c => 0x43, // Latin Capital Letter C With Caron
0x010d => 0x63, // Latin Small Letter C With Caron
0x010e => 0x44, // Latin Capital Letter D With Caron
0x010f => 0x64, // Latin Small Letter D With Caron
0x0110 => 0xd0, // Latin Capital Letter D With Stroke
0x0111 => 0x64, // Latin Small Letter D With Stroke
0x0112 => 0x45, // Latin Capital Letter E With Macron
0x0113 => 0x65, // Latin Small Letter E With Macron
0x0114 => 0x45, // Latin Capital Letter E With Breve
0x0115 => 0x65, // Latin Small Letter E With Breve
0x0116 => 0x45, // Latin Capital Letter E With Dot Above
0x0117 => 0x65, // Latin Small Letter E With Dot Above
0x0118 => 0x45, // Latin Capital Letter E With Ogonek
0x0119 => 0x65, // Latin Small Letter E With Ogonek
0x011a => 0x45, // Latin Capital Letter E With Caron
0x011b => 0x65, // Latin Small Letter E With Caron
0x011c => 0x47, // Latin Capital Letter G With Circumflex
0x011d => 0x67, // Latin Small Letter G With Circumflex
0x011e => 0x47, // Latin Capital Letter G With Breve
0x011f => 0x67, // Latin Small Letter G With Breve
0x0120 => 0x47, // Latin Capital Letter G With Dot Above
0x0121 => 0x67, // Latin Small Letter G With Dot Above
0x0122 => 0x47, // Latin Capital Letter G With Cedilla
0x0123 => 0x67, // Latin Small Letter G With Cedilla
0x0124 => 0x48, // Latin Capital Letter H With Circumflex
0x0125 => 0x68, // Latin Small Letter H With Circumflex
0x0126 => 0x48, // Latin Capital Letter H With Stroke
0x0127 => 0x68, // Latin Small Letter H With Stroke
0x0128 => 0x49, // Latin Capital Letter I With Tilde
0x0129 => 0x69, // Latin Small Letter I With Tilde
0x012a => 0x49, // Latin Capital Letter I With Macron
0x012b => 0x69, // Latin Small Letter I With Macron
0x012c => 0x49, // Latin Capital Letter I With Breve
0x012d => 0x69, // Latin Small Letter I With Breve
0x012e => 0x49, // Latin Capital Letter I With Ogonek
0x012f => 0x69, // Latin Small Letter I With Ogonek
0x0130 => 0x49, // Latin Capital Letter I With Dot Above
0x0131 => 0x69, // Latin Small Letter Dotless I
0x0134 => 0x4a, // Latin Capital Letter J With Circumflex
0x0135 => 0x6a, // Latin Small Letter J With Circumflex
0x0136 => 0x4b, // Latin Capital Letter K With Cedilla
0x0137 => 0x6b, // Latin Small Letter K With Cedilla
0x0139 => 0x4c, // Latin Capital Letter L With Acute
0x013a => 0x6c, // Latin Small Letter L With Acute
0x013b => 0x4c, // Latin Capital Letter L With Cedilla
0x013c => 0x6c, // Latin Small Letter L With Cedilla
0x013d => 0x4c, // Latin Capital Letter L With Caron
0x013e => 0x6c, // Latin Small Letter L With Caron
0x0141 => 0x4c, // Latin Capital Letter L With Stroke
0x0142 => 0x6c, // Latin Small Letter L With Stroke
0x0143 => 0x4e, // Latin Capital Letter N With Acute
0x0144 => 0x6e, // Latin Small Letter N With Acute
0x0145 => 0x4e, // Latin Capital Letter N With Cedilla
0x0146 => 0x6e, // Latin Small Letter N With Cedilla
0x0147 => 0x4e, // Latin Capital Letter N With Caron
0x0148 => 0x6e, // Latin Small Letter N With Caron
0x014c => 0x4f, // Latin Capital Letter O With Macron
0x014d => 0x6f, // Latin Small Letter O With Macron
0x014e => 0x4f, // Latin Capital Letter O With Breve
0x014f => 0x6f, // Latin Small Letter O With Breve
0x0150 => 0x4f, // Latin Capital Letter O With Double Acute
0x0151 => 0x6f, // Latin Small Letter O With Double Acute
0x0152 => 0x8c, // Latin Capital Ligature Oe
0x0153 => 0x9c, // Latin Small Ligature Oe
0x0154 => 0x52, // Latin Capital Letter R With Acute
0x0155 => 0x72, // Latin Small Letter R With Acute
0x0156 => 0x52, // Latin Capital Letter R With Cedilla
0x0157 => 0x72, // Latin Small Letter R With Cedilla
0x0158 => 0x52, // Latin Capital Letter R With Caron
0x0159 => 0x72, // Latin Small Letter R With Caron
0x015a => 0x53, // Latin Capital Letter S With Acute
0x015b => 0x73, // Latin Small Letter S With Acute
0x015c => 0x53, // Latin Capital Letter S With Circumflex
0x015d => 0x73, // Latin Small Letter S With Circumflex
0x015e => 0x53, // Latin Capital Letter S With Cedilla
0x015f => 0x73, // Latin Small Letter S With Cedilla
0x0160 => 0x8a, // Latin Capital Letter S With Caron
0x0161 => 0x9a, // Latin Small Letter S With Caron
0x0162 => 0x54, // Latin Capital Letter T With Cedilla
0x0163 => 0x74, // Latin Small Letter T With Cedilla
0x0164 => 0x54, // Latin Capital Letter T With Caron
0x0165 => 0x74, // Latin Small Letter T With Caron
0x0166 => 0x54, // Latin Capital Letter T With Stroke
0x0167 => 0x74, // Latin Small Letter T With Stroke
0x0168 => 0x55, // Latin Capital Letter U With Tilde
0x0169 => 0x75, // Latin Small Letter U With Tilde
0x016a => 0x55, // Latin Capital Letter U With Macron
0x016b => 0x75, // Latin Small Letter U With Macron
0x016c => 0x55, // Latin Capital Letter U With Breve
0x016d => 0x75, // Latin Small Letter U With Breve
0x016e => 0x55, // Latin Capital Letter U With Ring Above
0x016f => 0x75, // Latin Small Letter U With Ring Above
0x0170 => 0x55, // Latin Capital Letter U With Double Acute
0x0171 => 0x75, // Latin Small Letter U With Double Acute
0x0172 => 0x55, // Latin Capital Letter U With Ogonek
0x0173 => 0x75, // Latin Small Letter U With Ogonek
0x0174 => 0x57, // Latin Capital Letter W With Circumflex
0x0175 => 0x77, // Latin Small Letter W With Circumflex
0x0176 => 0x59, // Latin Capital Letter Y With Circumflex
0x0177 => 0x79, // Latin Small Letter Y With Circumflex
0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis
0x0179 => 0x5a, // Latin Capital Letter Z With Acute
0x017a => 0x7a, // Latin Small Letter Z With Acute
0x017b => 0x5a, // Latin Capital Letter Z With Dot Above
0x017c => 0x7a, // Latin Small Letter Z With Dot Above
0x017d => 0x8e, // Latin Capital Letter Z With Caron
0x017e => 0x9e, // Latin Small Letter Z With Caron
0x0180 => 0x62, // Latin Small Letter B With Stroke
0x0189 => 0xd0, // Latin Capital Letter African D
0x0191 => 0x83, // Latin Capital Letter F With Hook
0x0192 => 0x83, // Latin Small Letter F With Hook
0x0197 => 0x49, // Latin Capital Letter I With Stroke
0x019a => 0x6c, // Latin Small Letter L With Bar
0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde
0x01a0 => 0x4f, // Latin Capital Letter O With Horn
0x01a1 => 0x6f, // Latin Small Letter O With Horn
0x01ab => 0x74, // Latin Small Letter T With Palatal Hook
0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook
0x01af => 0x55, // Latin Capital Letter U With Horn
0x01b0 => 0x75, // Latin Small Letter U With Horn
0x01b6 => 0x7a, // Latin Small Letter Z With Stroke
0x01c0 => 0x7c, // Latin Letter Dental Click
0x01c3 => 0x21, // Latin Letter Retroflex Click
0x01cd => 0x41, // Latin Capital Letter A With Caron
0x01ce => 0x61, // Latin Small Letter A With Caron
0x01cf => 0x49, // Latin Capital Letter I With Caron
0x01d0 => 0x69, // Latin Small Letter I With Caron
0x01d1 => 0x4f, // Latin Capital Letter O With Caron
0x01d2 => 0x6f, // Latin Small Letter O With Caron
0x01d3 => 0x55, // Latin Capital Letter U With Caron
0x01d4 => 0x75, // Latin Small Letter U With Caron
0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron
0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron
0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute
0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute
0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron
0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron
0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave
0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave
0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron
0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron
0x01e4 => 0x47, // Latin Capital Letter G With Stroke
0x01e5 => 0x67, // Latin Small Letter G With Stroke
0x01e6 => 0x47, // Latin Capital Letter G With Caron
0x01e7 => 0x67, // Latin Small Letter G With Caron
0x01e8 => 0x4b, // Latin Capital Letter K With Caron
0x01e9 => 0x6b, // Latin Small Letter K With Caron
0x01ea => 0x4f, // Latin Capital Letter O With Ogonek
0x01eb => 0x6f, // Latin Small Letter O With Ogonek
0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron
0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron
0x01f0 => 0x6a, // Latin Small Letter J With Caron
0x0261 => 0x67, // Latin Small Letter Script G
0x02b9 => 0x27, // Modifier Letter Prime
0x02ba => 0x22, // Modifier Letter Double Prime
0x02bc => 0x27, // Modifier Letter Apostrophe
0x02c4 => 0x5e, // Modifier Letter Up Arrowhead
0x02c6 => 0x88, // Modifier Letter Circumflex Accent
0x02c8 => 0x27, // Modifier Letter Vertical Line
0x02c9 => 0xaf, // Modifier Letter Macron
0x02ca => 0xb4, // Modifier Letter Acute Accent
0x02cb => 0x60, // Modifier Letter Grave Accent
0x02cd => 0x5f, // Modifier Letter Low Macron
0x02da => 0xb0, // Ring Above
0x02dc => 0x98, // Small Tilde
0x0300 => 0x60, // Combining Grave Accent
0x0301 => 0xb4, // Combining Acute Accent
0x0302 => 0x5e, // Combining Circumflex Accent
0x0303 => 0x7e, // Combining Tilde
0x0304 => 0xaf, // Combining Macron
0x0305 => 0xaf, // Combining Overline
0x0308 => 0xa8, // Combining Diaeresis
0x030a => 0xb0, // Combining Ring Above
0x030e => 0x22, // Combining Double Vertical Line Above
0x0327 => 0xb8, // Combining Cedilla
0x0331 => 0x5f, // Combining Macron Below
0x0332 => 0x5f, // Combining Low Line
0x037e => 0x3b, // Greek Question Mark
0x0393 => 0x47, // Greek Capital Letter Gamma
0x0398 => 0x54, // Greek Capital Letter Theta
0x03a3 => 0x53, // Greek Capital Letter Sigma
0x03a6 => 0x46, // Greek Capital Letter Phi
0x03a9 => 0x4f, // Greek Capital Letter Omega
0x03b1 => 0x61, // Greek Small Letter Alpha
0x03b2 => 0xdf, // Greek Small Letter Beta
0x03b4 => 0x64, // Greek Small Letter Delta
0x03b5 => 0x65, // Greek Small Letter Epsilon
0x03bc => 0xb5, // Greek Small Letter Mu
0x03c0 => 0x70, // Greek Small Letter Pi
0x03c3 => 0x73, // Greek Small Letter Sigma
0x03c4 => 0x74, // Greek Small Letter Tau
0x03c6 => 0x66, // Greek Small Letter Phi
0x04bb => 0x68, // Cyrillic Small Letter Shha
0x0589 => 0x3a, // Armenian Full Stop
0x066a => 0x25, // Arabic Percent Sign
0x2000 => 0x20, // En Quad
0x2001 => 0x20, // Em Quad
0x2002 => 0x20, // En Space
0x2003 => 0x20, // Em Space
0x2004 => 0x20, // Three-Per-Em Space
0x2005 => 0x20, // Four-Per-Em Space
0x2006 => 0x20, // Six-Per-Em Space
0x2010 => 0x2d, // Hyphen
0x2011 => 0x2d, // Non-Breaking Hyphen
0x2013 => 0x96, // En Dash
0x2014 => 0x97, // Em Dash
0x2017 => 0x3d, // Double Low Line
0x2018 => 0x91, // Left Single Quotation Mark
0x2019 => 0x92, // Right Single Quotation Mark
0x201a => 0x82, // Single Low-9 Quotation Mark
0x201c => 0x93, // Left Double Quotation Mark
0x201d => 0x94, // Right Double Quotation Mark
0x201e => 0x84, // Double Low-9 Quotation Mark
0x2020 => 0x86, // Dagger
0x2021 => 0x87, // Double Dagger
0x2022 => 0x95, // Bullet
0x2024 => 0xb7, // One Dot Leader
0x2026 => 0x85, // Horizontal Ellipsis
0x2030 => 0x89, // Per Mille Sign
0x2032 => 0x27, // Prime
0x2035 => 0x60, // Reversed Prime
0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark
0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark
0x2044 => 0x2f, // Fraction Slash
0x2070 => 0xb0, // Superscript Zero
0x2074 => 0x34, // Superscript Four
0x2075 => 0x35, // Superscript Five
0x2076 => 0x36, // Superscript Six
0x2077 => 0x37, // Superscript Seven
0x2078 => 0x38, // Superscript Eight
0x207f => 0x6e, // Superscript Latin Small Letter N
0x2080 => 0x30, // Subscript Zero
0x2081 => 0x31, // Subscript One
0x2082 => 0x32, // Subscript Two
0x2083 => 0x33, // Subscript Three
0x2084 => 0x34, // Subscript Four
0x2085 => 0x35, // Subscript Five
0x2086 => 0x36, // Subscript Six
0x2087 => 0x37, // Subscript Seven
0x2088 => 0x38, // Subscript Eight
0x2089 => 0x39, // Subscript Nine
0x20ac => 0x80, // Euro Sign
0x20a1 => 0xa2, // Colon Sign
0x20a4 => 0xa3, // Lira Sign
0x20a7 => 0x50, // Peseta Sign
0x2102 => 0x43, // Double-Struck Capital C
0x2107 => 0x45, // Euler Constant
0x210a => 0x67, // Script Small G
0x210b => 0x48, // Script Capital H
0x210c => 0x48, // Black-Letter Capital H
0x210d => 0x48, // Double-Struck Capital H
0x210e => 0x68, // Planck Constant
0x2110 => 0x49, // Script Capital I
0x2111 => 0x49, // Black-Letter Capital I
0x2112 => 0x4c, // Script Capital L
0x2113 => 0x6c, // Script Small L
0x2115 => 0x4e, // Double-Struck Capital N
0x2118 => 0x50, // Script Capital P
0x2119 => 0x50, // Double-Struck Capital P
0x211a => 0x51, // Double-Struck Capital Q
0x211b => 0x52, // Script Capital R
0x211c => 0x52, // Black-Letter Capital R
0x211d => 0x52, // Double-Struck Capital R
0x2122 => 0x99, // Trade Mark Sign
0x2124 => 0x5a, // Double-Struck Capital Z
0x2128 => 0x5a, // Black-Letter Capital Z
0x212a => 0x4b, // Kelvin Sign
0x212b => 0xc5, // Angstrom Sign
0x212c => 0x42, // Script Capital B
0x212d => 0x43, // Black-Letter Capital C
0x212e => 0x65, // Estimated Symbol
0x212f => 0x65, // Script Small E
0x2130 => 0x45, // Script Capital E
0x2131 => 0x46, // Script Capital F
0x2133 => 0x4d, // Script Capital M
0x2134 => 0x6f, // Script Small O
0x2205 => 0xd8, // Empty Set
0x2212 => 0x2d, // Minus Sign
0x2213 => 0xb1, // Minus-Or-Plus Sign
0x2215 => 0x2f, // Division Slash
0x2216 => 0x5c, // Set Minus
0x2217 => 0x2a, // Asterisk Operator
0x2218 => 0xb0, // Ring Operator
0x2219 => 0xb7, // Bullet Operator
0x221a => 0x76, // Square Root
0x221e => 0x38, // Infinity
0x2223 => 0x7c, // Divides
0x2229 => 0x6e, // Intersection
0x2236 => 0x3a, // Ratio
0x223c => 0x7e, // Tilde Operator
0x2248 => 0x98, // Almost Equal To
0x2261 => 0x3d, // Identical To
0x2264 => 0x3d, // Less-Than Or Equal To
0x2265 => 0x3d, // Greater-Than Or Equal To
0x226a => 0xab, // Much Less-Than
0x226b => 0xbb, // Much Greater-Than
0x22c5 => 0xb7, // Dot Operator
0x2302 => 0xa6, // House
0x2303 => 0x5e, // Up Arrowhead
0x2310 => 0xac, // Reversed Not Sign
0x2320 => 0x28, // Top Half Integral
0x2321 => 0x29, // Bottom Half Integral
0x2329 => 0x3c, // Left-Pointing Angle Bracket
0x232a => 0x3e, // Right-Pointing Angle Bracket
0x2500 => 0x2d, // Box Drawings Light Horizontal
0x2502 => 0xa6, // Box Drawings Light Vertical
0x250c => 0x2b, // Box Drawings Light Down And Right
0x2510 => 0x2b, // Box Drawings Light Down And Left
0x2514 => 0x2b, // Box Drawings Light Up And Right
0x2518 => 0x2b, // Box Drawings Light Up And Left
0x251c => 0x2b, // Box Drawings Light Vertical And Right
0x2524 => 0xa6, // Box Drawings Light Vertical And Left
0x252c => 0x2d, // Box Drawings Light Down And Horizontal
0x2534 => 0x2d, // Box Drawings Light Up And Horizontal
0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal
0x2550 => 0x2d, // Box Drawings Double Horizontal
0x2551 => 0xa6, // Box Drawings Double Vertical
0x2552 => 0x2b, // Box Drawings Down Single And Right Double
0x2553 => 0x2b, // Box Drawings Down Double And Right Single
0x2554 => 0x2b, // Box Drawings Double Down And Right
0x2555 => 0x2b, // Box Drawings Down Single And Left Double
0x2556 => 0x2b, // Box Drawings Down Double And Left Single
0x2557 => 0x2b, // Box Drawings Double Down And Left
0x2558 => 0x2b, // Box Drawings Up Single And Right Double
0x2559 => 0x2b, // Box Drawings Up Double And Right Single
0x255a => 0x2b, // Box Drawings Double Up And Right
0x255b => 0x2b, // Box Drawings Up Single And Left Double
0x255c => 0x2b, // Box Drawings Up Double And Left Single
0x255d => 0x2b, // Box Drawings Double Up And Left
0x255e => 0xa6, // Box Drawings Vertical Single And Right Double
0x255f => 0xa6, // Box Drawings Vertical Double And Right Single
0x2560 => 0xa6, // Box Drawings Double Vertical And Right
0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double
0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single
0x2563 => 0xa6, // Box Drawings Double Vertical And Left
0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double
0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single
0x2566 => 0x2d, // Box Drawings Double Down And Horizontal
0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double
0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single
0x2569 => 0x2d, // Box Drawings Double Up And Horizontal
0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double
0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single
0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal
0x2580 => 0xaf, // Upper Half Block
0x2584 => 0x5f, // Lower Half Block
0x2588 => 0xa6, // Full Block
0x258c => 0xa6, // Left Half Block
0x2590 => 0xa6, // Right Half Block
0x2591 => 0xa6, // Light Shade
0x2592 => 0xa6, // Medium Shade
0x2593 => 0xa6, // Dark Shade
0x25a0 => 0xa6, // Black Square
0x263c => 0xa4, // White Sun With Rays
0x2758 => 0x7c, // Light Vertical Bar
0x3000 => 0x20, // Ideographic Space
0x3008 => 0x3c, // Left Angle Bracket
0x3009 => 0x3e, // Right Angle Bracket
0x300a => 0xab, // Left Double Angle Bracket
0x300b => 0xbb, // Right Double Angle Bracket
0x301a => 0x5b, // Left White Square Bracket
0x301b => 0x5d, // Right White Square Bracket
0x30fb => 0xb7, // Katakana Middle Dot
0xff01 => 0x21, // Fullwidth Exclamation Mark
0xff02 => 0x22, // Fullwidth Quotation Mark
0xff03 => 0x23, // Fullwidth Number Sign
0xff04 => 0x24, // Fullwidth Dollar Sign
0xff05 => 0x25, // Fullwidth Percent Sign
0xff06 => 0x26, // Fullwidth Ampersand
0xff07 => 0x27, // Fullwidth Apostrophe
0xff08 => 0x28, // Fullwidth Left Parenthesis
0xff09 => 0x29, // Fullwidth Right Parenthesis
0xff0a => 0x2a, // Fullwidth Asterisk
0xff0b => 0x2b, // Fullwidth Plus Sign
0xff0c => 0x2c, // Fullwidth Comma
0xff0d => 0x2d, // Fullwidth Hyphen-Minus
0xff0e => 0x2e, // Fullwidth Full Stop
0xff0f => 0x2f, // Fullwidth Solidus
0xff10 => 0x30, // Fullwidth Digit Zero
0xff11 => 0x31, // Fullwidth Digit One
0xff12 => 0x32, // Fullwidth Digit Two
0xff13 => 0x33, // Fullwidth Digit Three
0xff14 => 0x34, // Fullwidth Digit Four
0xff15 => 0x35, // Fullwidth Digit Five
0xff16 => 0x36, // Fullwidth Digit Six
0xff17 => 0x37, // Fullwidth Digit Seven
0xff18 => 0x38, // Fullwidth Digit Eight
0xff19 => 0x39, // Fullwidth Digit Nine
0xff1a => 0x3a, // Fullwidth Colon
0xff1b => 0x3b, // Fullwidth Semicolon
0xff1c => 0x3c, // Fullwidth Less-Than Sign
0xff1d => 0x3d, // Fullwidth Equals Sign
0xff1e => 0x3e, // Fullwidth Greater-Than Sign
0xff1f => 0x3f, // Fullwidth Question Mark
0xff20 => 0x40, // Fullwidth Commercial At
0xff21 => 0x41, // Fullwidth Latin Capital Letter A
0xff22 => 0x42, // Fullwidth Latin Capital Letter B
0xff23 => 0x43, // Fullwidth Latin Capital Letter C
0xff24 => 0x44, // Fullwidth Latin Capital Letter D
0xff25 => 0x45, // Fullwidth Latin Capital Letter E
0xff26 => 0x46, // Fullwidth Latin Capital Letter F
0xff27 => 0x47, // Fullwidth Latin Capital Letter G
0xff28 => 0x48, // Fullwidth Latin Capital Letter H
0xff29 => 0x49, // Fullwidth Latin Capital Letter I
0xff2a => 0x4a, // Fullwidth Latin Capital Letter J
0xff2b => 0x4b, // Fullwidth Latin Capital Letter K
0xff2c => 0x4c, // Fullwidth Latin Capital Letter L
0xff2d => 0x4d, // Fullwidth Latin Capital Letter M
0xff2e => 0x4e, // Fullwidth Latin Capital Letter N
0xff2f => 0x4f, // Fullwidth Latin Capital Letter O
0xff30 => 0x50, // Fullwidth Latin Capital Letter P
0xff31 => 0x51, // Fullwidth Latin Capital Letter Q
0xff32 => 0x52, // Fullwidth Latin Capital Letter R
0xff33 => 0x53, // Fullwidth Latin Capital Letter S
0xff34 => 0x54, // Fullwidth Latin Capital Letter T
0xff35 => 0x55, // Fullwidth Latin Capital Letter U
0xff36 => 0x56, // Fullwidth Latin Capital Letter V
0xff37 => 0x57, // Fullwidth Latin Capital Letter W
0xff38 => 0x58, // Fullwidth Latin Capital Letter X
0xff39 => 0x59, // Fullwidth Latin Capital Letter Y
0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z
0xff3b => 0x5b, // Fullwidth Left Square Bracket
0xff3c => 0x5c, // Fullwidth Reverse Solidus
0xff3d => 0x5d, // Fullwidth Right Square Bracket
0xff3e => 0x5e, // Fullwidth Circumflex Accent
0xff3f => 0x5f, // Fullwidth Low Line
0xff40 => 0x60, // Fullwidth Grave Accent
0xff41 => 0x61, // Fullwidth Latin Small Letter A
0xff42 => 0x62, // Fullwidth Latin Small Letter B
0xff43 => 0x63, // Fullwidth Latin Small Letter C
0xff44 => 0x64, // Fullwidth Latin Small Letter D
0xff45 => 0x65, // Fullwidth Latin Small Letter E
0xff46 => 0x66, // Fullwidth Latin Small Letter F
0xff47 => 0x67, // Fullwidth Latin Small Letter G
0xff48 => 0x68, // Fullwidth Latin Small Letter H
0xff49 => 0x69, // Fullwidth Latin Small Letter I
0xff4a => 0x6a, // Fullwidth Latin Small Letter J
0xff4b => 0x6b, // Fullwidth Latin Small Letter K
0xff4c => 0x6c, // Fullwidth Latin Small Letter L
0xff4d => 0x6d, // Fullwidth Latin Small Letter M
0xff4e => 0x6e, // Fullwidth Latin Small Letter N
0xff4f => 0x6f, // Fullwidth Latin Small Letter O
0xff50 => 0x70, // Fullwidth Latin Small Letter P
0xff51 => 0x71, // Fullwidth Latin Small Letter Q
0xff52 => 0x72, // Fullwidth Latin Small Letter R
0xff53 => 0x73, // Fullwidth Latin Small Letter S
0xff54 => 0x74, // Fullwidth Latin Small Letter T
0xff55 => 0x75, // Fullwidth Latin Small Letter U
0xff56 => 0x76, // Fullwidth Latin Small Letter V
0xff57 => 0x77, // Fullwidth Latin Small Letter W
0xff58 => 0x78, // Fullwidth Latin Small Letter X
0xff59 => 0x79, // Fullwidth Latin Small Letter Y
0xff5a => 0x7a, // Fullwidth Latin Small Letter Z
0xff5b => 0x7b, // Fullwidth Left Curly Bracket
0xff5c => 0x7c, // Fullwidth Vertical Line
0xff5d => 0x7d, // Fullwidth Right Curly Bracket
0xff5e => 0x7e, // Fullwidth Tilde
// Not in the best fit mapping, but RC uses these mappings too
0x2007 => 0xA0, // Figure Space
0x2008 => ' ', // Punctuation Space
0x2009 => ' ', // Thin Space
0x200A => ' ', // Hair Space
0x2012 => '-', // Figure Dash
0x2015 => '-', // Horizontal Bar
0x201B => '\'', // Single High-reversed-9 Quotation Mark
0x201F => '"', // Double High-reversed-9 Quotation Mark
0x202F => 0xA0, // Narrow No-Break Space
0x2033 => '"', // Double Prime
0x2036 => '"', // Reversed Double Prime
else => null,
};
}
test "windows-1252 to utf8" {
var buf = std.ArrayList(u8).init(std.testing.allocator);
defer buf.deinit();
const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
var fbs = std.io.fixedBufferStream(input_windows1252);
const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader());
try std.testing.expectEqualStrings(expected_utf8, buf.items);
try std.testing.expectEqual(expected_utf8.len, bytes_written);
}

View File

@ -194,6 +194,10 @@ pub const build_cases = [_]BuildCase{
.build_root = "test/standalone/load_dynamic_library",
.import = @import("standalone/load_dynamic_library/build.zig"),
},
.{
.build_root = "test/standalone/windows_resources",
.import = @import("standalone/windows_resources/build.zig"),
},
.{
.build_root = "test/standalone/windows_spawn",
.import = @import("standalone/windows_spawn/build.zig"),

View File

@ -0,0 +1,40 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const test_step = b.step("test", "Test it");
b.default_step = test_step;
const native_target: std.zig.CrossTarget = .{};
const cross_target = .{
.cpu_arch = .x86_64,
.os_tag = .windows,
.abi = .gnu,
};
add(b, native_target, .any, test_step);
add(b, cross_target, .any, test_step);
add(b, native_target, .gnu, test_step);
add(b, cross_target, .gnu, test_step);
}
fn add(b: *std.Build, target: std.zig.CrossTarget, rc_includes: enum { any, gnu }, test_step: *std.Build.Step) void {
const exe = b.addExecutable(.{
.name = "zig_resource_test",
.root_source_file = .{ .path = "main.zig" },
.target = target,
.optimize = .Debug,
});
exe.addWin32ResourceFile(.{
.file = .{ .path = "res/zig.rc" },
.flags = &.{"/c65001"}, // UTF-8 code page
});
exe.rc_includes = switch (rc_includes) {
.any => .any,
.gnu => .gnu,
};
_ = exe.getEmittedBin();
test_step.dependOn(&exe.step);
}

View File

@ -0,0 +1,5 @@
const std = @import("std");
pub fn main() !void {
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
}

View File

@ -0,0 +1 @@
abcdefg

View File

@ -0,0 +1 @@
2 RCDATA hello.bin

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 KiB

View File

@ -0,0 +1,40 @@
#define ICO_ID 1
// Nothing from windows.h is used in this .rc file,
// but it's common to include it within a .rc file
// so this makes sure that it can be found on
// all platforms.
#include "windows.h"
ICO_ID ICON "zig.ico"
1 VERSIONINFO
FILEVERSION 1L,0,0,2
PRODUCTVERSION 1,0,0,1
FILEFLAGSMASK 0x3fL
FILEFLAGS 0x1L
FILEOS 0x4L
FILETYPE 0x1L
FILESUBTYPE 0x0L
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904e4"
BEGIN
VALUE "CompanyName", "Zig"
VALUE "FileDescription", "My cool zig program"
VALUE "FileVersion", "1.0.0.1"
VALUE "InternalName", "zig-ico.exe"
VALUE "LegalCopyright", "(c) no one"
VALUE "OriginalFilename", "zig-ico.exe"
VALUE "ProductName", "Zig but with an icon"
VALUE "ProductVersion", "1.0.0.1"
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x409, 1252
END
END
#include "sub/sub.rc"