Merge pull request #17608 from squeek502/resinator-fixes

resinator: Fix `INCLUDE` var handling and sync with upstream
2026-03-11 21:59:52 +00:00 · 2023-10-20 03:49:14 -04:00 · 2023-10-20 03:49:14 -04:00 · a361f37b1c
commit a361f37b1c
parent db18b562ac 8ec04b567e
9 changed files with 128 additions and 79 deletions
--- a/lib/std/Build/Step/Compile.zig
+++ b/lib/std/Build/Step/Compile.zig
@ -244,6 +244,8 @@ pub const RcSourceFile = struct {
    file: LazyPath,
    /// Any option that rc.exe accepts will work here, with the exception of:
    /// - `/fo`: The output filename is set by the build system
+    /// - `/p`: Only running the preprocessor is not supported in this context
+    /// - `/:no-preprocess` (non-standard option): Not supported in this context
    /// - Any MUI-related option
    /// https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line-
    ///
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@ -4766,11 +4766,21 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
        };
        defer options.deinit();

+        // We never want to read the INCLUDE environment variable, so
+        // unconditionally set `ignore_include_env_var` to true
+        options.ignore_include_env_var = true;
+
+        if (options.preprocess != .yes) {
+            return comp.failWin32Resource(win32_resource, "the '{s}' option is not supported in this context", .{switch (options.preprocess) {
+                .no => "/:no-preprocess",
+                .only => "/p",
+                .yes => unreachable,
+            }});
+        }
+
        var argv = std.ArrayList([]const u8).init(comp.gpa);
        defer argv.deinit();

-        // TODO: support options.preprocess == .no and .only
-        //       alternatively, error if those options are used
        try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" });

        try resinator.preprocess.appendClangArgs(arena, &argv, options, .{
--- a/src/introspect.zig
+++ b/src/introspect.zig
@ -157,8 +157,6 @@ pub const EnvVar = enum {
    NO_COLOR,
    XDG_CACHE_HOME,
    HOME,
-    /// https://github.com/ziglang/zig/issues/17585
-    INCLUDE,

    pub fn isSet(comptime ev: EnvVar) bool {
        return std.process.hasEnvVarConstant(@tagName(ev));
--- a/src/resinator/compile.zig
+++ b/src/resinator/compile.zig
@ -28,7 +28,6 @@ const windows1252 = @import("windows1252.zig");
 const lang = @import("lang.zig");
 const code_pages = @import("code_pages.zig");
 const errors = @import("errors.zig");
-const introspect = @import("../introspect.zig");

 pub const CompileOptions = struct {
    cwd: std.fs.Dir,
@ -89,10 +88,23 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
        }
    }
    // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed)
-    // `catch unreachable` since `options.cwd` is expected to be a valid dir handle, so opening
-    // a new handle to it should be fine as well.
-    // TODO: Maybe catch and return an error instead
-    const cwd_dir = options.cwd.openDir(".", .{}) catch @panic("unable to open dir");
+    const cwd_dir = options.cwd.openDir(".", .{}) catch |err| {
+        try options.diagnostics.append(.{
+            .err = .failed_to_open_cwd,
+            .token = .{
+                .id = .invalid,
+                .start = 0,
+                .end = 0,
+                .line_number = 1,
+            },
+            .print_source_line = false,
+            .extra = .{ .file_open_error = .{
+                .err = ErrorDetails.FileOpenError.enumFromError(err),
+                .filename_string_index = undefined,
+            } },
+        });
+        return error.CompileError;
+    };
    try search_dirs.append(.{ .dir = cwd_dir, .path = null });
    for (options.extra_include_paths) |extra_include_path| {
        var dir = openSearchPathDir(options.cwd, extra_include_path) catch {
@ -111,11 +123,16 @@ pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, option
        try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) });
    }
    if (!options.ignore_include_env_var) {
-        const INCLUDE = (introspect.EnvVar.INCLUDE.get(allocator) catch @panic("OOM")) orelse "";
+        const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch "";
        defer allocator.free(INCLUDE);

-        // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
-        var it = std.mem.tokenize(u8, INCLUDE, ";");
+        // The only precedence here is llvm-rc which also uses the platform-specific
+        // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+        const delimiter = switch (builtin.os.tag) {
+            .windows => ';',
+            else => ':',
+        };
+        var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
        while (it.next()) |search_path| {
            var dir = openSearchPathDir(options.cwd, search_path) catch continue;
            errdefer dir.close();
--- a/src/resinator/errors.zig
+++ b/src/resinator/errors.zig
@ -395,6 +395,10 @@ pub const ErrorDetails = struct {
        // General (used in various places)
        /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
        win32_non_ascii_ordinal,
+
+        // Initialization
+        /// `file_open_error` is populated, but `filename_string_index` is not
+        failed_to_open_cwd,
    };

    pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
@ -766,6 +770,9 @@ pub const ErrorDetails = struct {
                .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
                .hint => return,
            },
+            .failed_to_open_cwd => {
+                try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)});
+            },
        }
    }

@ -804,7 +811,8 @@ pub const ErrorDetails = struct {
                .point_offset = self.token.start - source_line_start,
                .after_len = after: {
                    const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end);
-                    if (end == self.token.start) break :after 0;
+                    // end may be less than start when pointing to EOF
+                    if (end <= self.token.start) break :after 0;
                    break :after end - self.token.start - 1;
                },
            },
@ -816,13 +824,18 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
    if (err_details.type == .hint) return;

    const source_line_start = err_details.token.getLineStart(source);
-    const column = err_details.token.calculateColumn(source, 1, source_line_start);
+    // Treat tab stops as 1 column wide for error display purposes,
+    // and add one to get a 1-based column
+    const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;

-    // var counting_writer_container = std.io.countingWriter(writer);
-    // const counting_writer = counting_writer_container.writer();
-
-    const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings) |mappings| mappings.get(err_details.token.line_number) else null;
-    const corresponding_file: ?[]const u8 = if (source_mappings) |mappings| mappings.files.get(corresponding_span.?.filename_offset) else null;
+    const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings != null and source_mappings.?.has(err_details.token.line_number))
+        source_mappings.?.get(err_details.token.line_number)
+    else
+        null;
+    const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null)
+        source_mappings.?.files.get(corresponding_span.?.filename_offset)
+    else
+        null;

    const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number;

@ -897,7 +910,7 @@ pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_con
    try writer.writeByte('\n');
    try tty_config.setColor(writer, .reset);

-    if (source_mappings) |_| {
+    if (corresponding_span != null and corresponding_file != null) {
        var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?);
        defer corresponding_lines.deinit(allocator);

--- a/src/resinator/lex.zig
+++ b/src/resinator/lex.zig
@ -6,7 +6,7 @@

 const std = @import("std");
 const ErrorDetails = @import("errors.zig").ErrorDetails;
-const columnsUntilTabStop = @import("literals.zig").columnsUntilTabStop;
+const columnWidth = @import("literals.zig").columnWidth;
 const code_pages = @import("code_pages.zig");
 const CodePage = code_pages.CodePage;
 const SourceMappings = @import("source_mapping.zig").SourceMappings;
@ -69,17 +69,14 @@ pub const Token = struct {
        };
    }

+    /// Returns 0-based column
    pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize {
        const line_start = maybe_line_start orelse token.getLineStart(source);

        var i: usize = line_start;
        var column: usize = 0;
        while (i < token.start) : (i += 1) {
-            const c = source[i];
-            switch (c) {
-                '\t' => column += columnsUntilTabStop(column, tab_columns),
-                else => column += 1,
-            }
+            column += columnWidth(column, source[i], tab_columns);
        }
        return column;
    }
@ -109,6 +106,7 @@ pub const Token = struct {
        const line_start = maybe_line_start orelse token.getLineStart(source);

        var line_end = line_start + 1;
+        if (line_end >= source.len or source[line_end] == '\n') return source[line_start..line_start];
        while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {}
        while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {}

@ -404,6 +402,9 @@ pub const Lexer = struct {
        // TODO: Understand this more, bring it more in line with how the Win32 limits work.
        //       Alternatively, do something that makes more sense but may be more permissive.
        var string_literal_length: usize = 0;
+        // Keeping track of the string literal column prevents pathological edge cases when
+        // there are tons of tab stop characters within a string literal.
+        var string_literal_column: usize = 0;
        var string_literal_collapsing_whitespace: bool = false;
        var still_could_have_exponent: bool = true;
        var exponent_index: ?usize = null;
@ -471,6 +472,14 @@ pub const Lexer = struct {
                        self.at_start_of_line = false;
                        string_literal_collapsing_whitespace = false;
                        string_literal_length = 0;
+
+                        var dummy_token = Token{
+                            .start = self.index,
+                            .end = self.index,
+                            .line_number = self.line_handler.line_number,
+                            .id = .invalid,
+                        };
+                        string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
                    },
                    '+', '&', '|' => {
                        self.index += 1;
@ -618,6 +627,14 @@ pub const Lexer = struct {
                        state = .quoted_wide_string;
                        string_literal_collapsing_whitespace = false;
                        string_literal_length = 0;
+
+                        var dummy_token = Token{
+                            .start = self.index,
+                            .end = self.index,
+                            .line_number = self.line_handler.line_number,
+                            .id = .invalid,
+                        };
+                        string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
                    },
                    else => {
                        state = .literal;
@ -695,18 +712,23 @@ pub const Lexer = struct {
                },
                .quoted_ascii_string, .quoted_wide_string => switch (c) {
                    '"' => {
+                        string_literal_column += 1;
                        state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end;
                    },
                    '\\' => {
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                        state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape;
                    },
                    '\r' => {
+                        string_literal_column = 0;
                        // \r doesn't count towards string literal length

                        // Increment line number but don't affect the result token's line number
                        _ = self.incrementLineNumber();
                    },
                    '\n' => {
+                        string_literal_column = 0;
                        // first \n expands to <space><\n>
                        if (!string_literal_collapsing_whitespace) {
                            string_literal_length += 2;
@ -720,33 +742,17 @@ pub const Lexer = struct {
                    // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing
                    '\t', ' ', '\x0b', '\x0c' => {
                        if (!string_literal_collapsing_whitespace) {
-                            if (c == '\t') {
-                                // Literal tab characters are counted as the number of space characters
-                                // needed to reach the next 8-column tab stop.
-                                //
-                                // This implemention is ineffecient but hopefully it's enough of an
-                                // edge case that it doesn't matter too much. Literal tab characters in
-                                // string literals being replaced by a variable number of spaces depending
-                                // on which column the tab character is located in the source .rc file seems
-                                // like it has extremely limited use-cases, so it seems unlikely that it's used
-                                // in real .rc files.
-                                var dummy_token = Token{
-                                    .start = self.index,
-                                    .end = self.index,
-                                    .line_number = self.line_handler.line_number,
-                                    .id = .invalid,
-                                };
-                                dummy_token.start = self.index;
-                                const current_column = dummy_token.calculateColumn(self.buffer, 8, null);
-                                string_literal_length += columnsUntilTabStop(current_column, 8);
-                            } else {
-                                string_literal_length += 1;
-                            }
+                            // Literal tab characters are counted as the number of space characters
+                            // needed to reach the next 8-column tab stop.
+                            const width = columnWidth(string_literal_column, @intCast(c), 8);
+                            string_literal_length += width;
+                            string_literal_column += width;
                        }
                    },
                    else => {
                        string_literal_collapsing_whitespace = false;
                        string_literal_length += 1;
+                        string_literal_column += 1;
                    },
                },
                .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) {
@ -760,14 +766,19 @@ pub const Lexer = struct {
                        return error.FoundCStyleEscapedQuote;
                    },
                    else => {
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                        state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string;
                    },
                },
                .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) {
                    '"' => {
                        state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
-                        // Escaped quotes only count as 1 char for string literal length checks,
-                        // so we don't increment string_literal_length here.
+                        // Escaped quotes count as 1 char for string literal length checks.
+                        // Since we did not increment on the first " (because it could have been
+                        // the end of the quoted string), we increment here
+                        string_literal_length += 1;
+                        string_literal_column += 1;
                    },
                    else => {
                        result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
@ -807,6 +818,8 @@ pub const Lexer = struct {
            }
        }

+        result.end = self.index;
+
        if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) {
            if (string_literal_length > self.max_string_literal_codepoints) {
                self.error_context_token = result;
@ -814,7 +827,6 @@ pub const Lexer = struct {
            }
        }

-        result.end = self.index;
        return result;
    }

@ -877,6 +889,7 @@ pub const Lexer = struct {
            .end = end,
            .line_number = self.line_handler.line_number,
        };
+        errdefer self.error_context_token = token;
        const full_command = self.buffer[start..end];
        var command = full_command;

@ -901,7 +914,6 @@ pub const Lexer = struct {
        }

        if (command.len == 0 or command[0] != '(') {
-            self.error_context_token = token;
            return error.CodePagePragmaMissingLeftParen;
        }
        command = command[1..];
@ -917,7 +929,6 @@ pub const Lexer = struct {
        }

        if (num_str.len == 0) {
-            self.error_context_token = token;
            return error.CodePagePragmaNotInteger;
        }

@ -926,7 +937,6 @@ pub const Lexer = struct {
        }

        if (command.len == 0 or command[0] != ')') {
-            self.error_context_token = token;
            return error.CodePagePragmaMissingRightParen;
        }

@ -943,41 +953,26 @@ pub const Lexer = struct {
            //
            // Instead of that, we just have a separate error specifically for overflow.
            const num = parseCodePageNum(num_str) catch |err| switch (err) {
-                error.InvalidCharacter => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaNotInteger;
-                },
-                error.Overflow => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaOverflow;
-                },
+                error.InvalidCharacter => return error.CodePagePragmaNotInteger,
+                error.Overflow => return error.CodePagePragmaOverflow,
            };

            // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252
            if (num_str[0] == '0' and num != 0) {
-                self.error_context_token = token;
                return error.CodePagePragmaInvalidCodePage;
            }
            // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation.
            else if (num == 0) {
-                self.error_context_token = token;
                return error.CodePagePragmaNotInteger;
            }
            // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16.
            if (num > std.math.maxInt(u16)) {
-                self.error_context_token = token;
                return error.CodePagePragmaInvalidCodePage;
            }

            break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) {
-                error.InvalidCodePage => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaInvalidCodePage;
-                },
-                error.UnsupportedCodePage => {
-                    self.error_context_token = token;
-                    return error.CodePagePragmaUnsupportedCodePage;
-                },
+                error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage,
+                error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage,
            };
        };

@ -990,7 +985,6 @@ pub const Lexer = struct {
        // to still be able to work correctly after this error is returned.
        if (self.source_mappings) |source_mappings| {
            if (!source_mappings.isRootFile(token.line_number)) {
-                self.error_context_token = token;
                return error.CodePagePragmaInIncludedFile;
            }
        }
--- a/src/resinator/literals.zig
+++ b/src/resinator/literals.zig
@ -775,6 +775,13 @@ pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
    return tab_columns - (column % tab_columns);
 }

+pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
+    return switch (c) {
+        '\t' => columnsUntilTabStop(cur_column, tab_columns),
+        else => 1,
+    };
+}
+
 pub const Number = struct {
    value: u32,
    is_long: bool = false,
--- a/src/resinator/preprocess.zig
+++ b/src/resinator/preprocess.zig
@ -1,7 +1,7 @@
 const std = @import("std");
+const builtin = @import("builtin");
 const Allocator = std.mem.Allocator;
 const cli = @import("cli.zig");
-const introspect = @import("../introspect.zig");

 pub const IncludeArgs = struct {
    clang_target: ?[]const u8 = null,
@ -68,10 +68,15 @@ pub fn appendClangArgs(arena: Allocator, argv: *std.ArrayList([]const u8), optio
    }

    if (!options.ignore_include_env_var) {
-        const INCLUDE = (introspect.EnvVar.INCLUDE.get(arena) catch @panic("OOM")) orelse "";
+        const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch "";

-        // TODO: Should this be platform-specific? How does windres/llvm-rc handle this (if at all)?
-        var it = std.mem.tokenize(u8, INCLUDE, ";");
+        // The only precedence here is llvm-rc which also uses the platform-specific
+        // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+        const delimiter = switch (builtin.os.tag) {
+            .windows => ';',
+            else => ':',
+        };
+        var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
        while (it.next()) |include_path| {
            try argv.append("-isystem");
            try argv.append(include_path);
--- a/src/resinator/source_mapping.zig
+++ b/src/resinator/source_mapping.zig
@ -240,6 +240,9 @@ pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current
    };
    defer allocator.free(filename);

+    // \x00 bytes in the filename is incompatible with how StringTable works
+    if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return;
+
    current_mapping.line_num = linenum;
    current_mapping.filename.clearRetainingCapacity();
    try current_mapping.filename.appendSlice(allocator, filename);
@ -441,7 +444,7 @@ pub const SourceMappings = struct {
        ptr.* = span;
    }

-    pub fn has(self: *SourceMappings, line_num: usize) bool {
+    pub fn has(self: SourceMappings, line_num: usize) bool {
        return self.mapping.items.len >= line_num;
    }