From 4d9c4ab82c61a5c6f6e120924941d2b22fd15dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20=C3=85stholm?= Date: Mon, 18 Dec 2023 23:08:27 +0100 Subject: [PATCH] More accurate argv-to-command-line serialization when spawning child processes on Windows The old implementation had a bug in it in that it didn't quote empty strings, but it also didn't properly follow the special quoting rules required for the first argument (the executable name). This new implementation serializes the argv correctly such that it can be parsed by the `CommandLineToArgvW` algorithm. --- lib/std/child_process.zig | 181 +++++++++++++++++++++++++++++++------- 1 file changed, 151 insertions(+), 30 deletions(-) diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig index ccf3c2428c..444adebbdc 100644 --- a/lib/std/child_process.zig +++ b/lib/std/child_process.zig @@ -744,9 +744,6 @@ pub const ChildProcess = struct { windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr); }; - const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv); - defer self.allocator.free(cmd_line); - var siStartInfo = windows.STARTUPINFOW{ .cb = @sizeOf(windows.STARTUPINFOW), .hStdError = g_hChildStd_ERR_Wr, @@ -818,7 +815,11 @@ pub const ChildProcess = struct { const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8); defer self.allocator.free(app_name_w); - const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line); + const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) { + // argv[0] contains unsupported characters that will never resolve to a valid exe. + error.InvalidArg0 => return error.FileNotFound, + else => |e| return e, + }; defer self.allocator.free(cmd_line_w); run: { @@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" { try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null); } -/// Caller must dealloc. -fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 { +pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 }; + +/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and +/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice. +pub fn argvToCommandLineWindows( + allocator: mem.Allocator, + argv: []const []const u8, +) ArgvToCommandLineError![:0]u16 { var buf = std.ArrayList(u8).init(allocator); defer buf.deinit(); - for (argv, 0..) |arg, arg_i| { - if (arg_i != 0) try buf.append(' '); - if (mem.indexOfAny(u8, arg, " \t\n\"") == null) { - try buf.appendSlice(arg); - continue; - } - try buf.append('"'); - var backslash_count: usize = 0; - for (arg) |byte| { - switch (byte) { - '\\' => backslash_count += 1, - '"' => { - try buf.appendNTimes('\\', backslash_count * 2 + 1); - try buf.append('"'); - backslash_count = 0; - }, - else => { - try buf.appendNTimes('\\', backslash_count); - try buf.append(byte); - backslash_count = 0; - }, + if (argv.len != 0) { + const arg0 = argv[0]; + + // The first argument must be quoted if it contains spaces or ASCII control characters + // (excluding DEL). It also follows special quoting rules where backslashes have no special + // interpretation, which makes it impossible to pass certain first arguments containing + // double quotes to a child process without characters from the first argument leaking into + // subsequent ones (which could have security implications). + // + // Empty arguments technically don't need quotes, but we quote them anyway for maximum + // compatibility with different implementations of the 'CommandLineToArgvW' algorithm. + // + // Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject + // all first arguments containing double quotes, even ones that we could theoretically + // serialize in unquoted form. + var needs_quotes = arg0.len == 0; + for (arg0) |c| { + if (c <= ' ') { + needs_quotes = true; + } else if (c == '"') { + return error.InvalidArg0; } } - try buf.appendNTimes('\\', backslash_count * 2); - try buf.append('"'); + if (needs_quotes) { + try buf.append('"'); + try buf.appendSlice(arg0); + try buf.append('"'); + } else { + try buf.appendSlice(arg0); + } + + for (argv[1..]) |arg| { + try buf.append(' '); + + // Subsequent arguments must be quoted if they contain spaces, tabs or double quotes, + // or if they are empty. For simplicity and for maximum compatibility with different + // implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII + // control characters (again, excluding DEL). + needs_quotes = for (arg) |c| { + if (c <= ' ' or c == '"') { + break true; + } + } else arg.len == 0; + if (!needs_quotes) { + try buf.appendSlice(arg); + continue; + } + + try buf.append('"'); + var backslash_count: usize = 0; + for (arg) |byte| { + switch (byte) { + '\\' => { + backslash_count += 1; + }, + '"' => { + try buf.appendNTimes('\\', backslash_count * 2 + 1); + try buf.append('"'); + backslash_count = 0; + }, + else => { + try buf.appendNTimes('\\', backslash_count); + try buf.append(byte); + backslash_count = 0; + }, + } + } + try buf.appendNTimes('\\', backslash_count * 2); + try buf.append('"'); + } } - return buf.toOwnedSliceSentinel(0); + return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items); +} + +test "argvToCommandLineWindows" { + const t = testArgvToCommandLineWindows; + + try t(&.{ + \\C:\Program Files\zig\zig.exe + , + \\run + , + \\.\src\main.zig + , + \\-target + , + \\x86_64-windows-gnu + , + \\-O + , + \\ReleaseSafe + , + \\-- + , + \\--emoji=🗿 + , + \\--eval=new Regex("Dwayne \"The Rock\" Johnson") + , + }, + \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")" + ); + + try t(&.{}, ""); + try t(&.{""}, "\"\""); + try t(&.{" "}, "\" \""); + try t(&.{"\t"}, "\"\t\""); + try t(&.{"\x07"}, "\"\x07\""); + try t(&.{"🦎"}, "🦎"); + + try t( + &.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" }, + "zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee", + ); + + try t( + &.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" }, + "\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"", + ); + + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}), + ); + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}), + ); + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}), + ); +} + +fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void { + const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv); + defer std.testing.allocator.free(cmd_line_w); + + const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w); + defer std.testing.allocator.free(cmd_line); + + try std.testing.expectEqualStrings(expected_cmd_line, cmd_line); } fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {