mirror of
https://github.com/ziglang/zig.git
synced 2026-01-20 22:35:24 +00:00
Merge pull request #19655 from squeek502/windows-argv-post-2008
ArgIteratorWindows: Match post-2008 C runtime rather than `CommandLineToArgvW`
This commit is contained in:
commit
b78b2689ed
@ -625,11 +625,22 @@ pub const ArgIteratorWasi = struct {
|
||||
};
|
||||
|
||||
/// Iterator that implements the Windows command-line parsing algorithm.
|
||||
/// The implementation is intended to be compatible with the post-2008 C runtime,
|
||||
/// but is *not* intended to be compatible with `CommandLineToArgvW` since
|
||||
/// `CommandLineToArgvW` uses the pre-2008 parsing rules.
|
||||
///
|
||||
/// This iterator faithfully implements the parsing behavior observed in `CommandLineToArgvW` with
|
||||
/// This iterator faithfully implements the parsing behavior observed from the C runtime with
|
||||
/// one exception: if the command-line string is empty, the iterator will immediately complete
|
||||
/// without returning any arguments (whereas `CommandLineArgvW` will return a single argument
|
||||
/// without returning any arguments (whereas the C runtime will return a single argument
|
||||
/// representing the name of the current executable).
|
||||
///
|
||||
/// The essential parts of the algorithm are described in Microsoft's documentation:
|
||||
///
|
||||
/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
|
||||
///
|
||||
/// David Deley explains some additional undocumented quirks in great detail:
|
||||
///
|
||||
/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
|
||||
pub const ArgIteratorWindows = struct {
|
||||
allocator: Allocator,
|
||||
/// Owned by the iterator.
|
||||
@ -686,6 +697,51 @@ pub const ArgIteratorWindows = struct {
|
||||
fn emitCharacter(self: *ArgIteratorWindows, char: u8) void {
|
||||
self.buffer[self.end] = char;
|
||||
self.end += 1;
|
||||
|
||||
// Because we are emitting WTF-8 byte-by-byte, we need to
|
||||
// check to see if we've emitted two consecutive surrogate
|
||||
// codepoints that form a valid surrogate pair in order
|
||||
// to ensure that we're always emitting well-formed WTF-8
|
||||
// (https://simonsapin.github.io/wtf-8/#concatenating).
|
||||
//
|
||||
// If we do have a valid surrogate pair, we need to emit
|
||||
// the UTF-8 sequence for the codepoint that they encode
|
||||
// instead of the WTF-8 encoding for the two surrogate pairs
|
||||
// separately.
|
||||
//
|
||||
// This is relevant when dealing with a WTF-16 encoded
|
||||
// command line like this:
|
||||
// "<0xD801>"<0xDC37>
|
||||
// which would get converted to WTF-8 in `cmd_line` as:
|
||||
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
|
||||
// and then after parsing it'd naively get emitted as:
|
||||
// <0xED><0xA0><0x81><0xED><0xB0><0xB7>
|
||||
// but instead, we need to recognize the surrogate pair
|
||||
// and emit the codepoint it encodes, which in this
|
||||
// example is U+10437 (𐐷), which is encoded in UTF-8 as:
|
||||
// <0xF0><0x90><0x90><0xB7>
|
||||
concatSurrogatePair(self);
|
||||
}
|
||||
|
||||
fn concatSurrogatePair(self: *ArgIteratorWindows) void {
|
||||
// Surrogate codepoints are always encoded as 3 bytes, so there
|
||||
// must be 6 bytes for a surrogate pair to exist.
|
||||
if (self.end - self.start >= 6) {
|
||||
const window = self.buffer[self.end - 6 .. self.end];
|
||||
const view = std.unicode.Wtf8View.init(window) catch return;
|
||||
var it = view.iterator();
|
||||
var pair: [2]u16 = undefined;
|
||||
pair[0] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
|
||||
if (!std.unicode.utf16IsHighSurrogate(std.mem.littleToNative(u16, pair[0]))) return;
|
||||
pair[1] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
|
||||
if (!std.unicode.utf16IsLowSurrogate(std.mem.littleToNative(u16, pair[1]))) return;
|
||||
// We know we have a valid surrogate pair, so convert
|
||||
// it to UTF-8, overwriting the surrogate pair's bytes
|
||||
// and then chop off the extra bytes.
|
||||
const len = std.unicode.utf16LeToUtf8(window, &pair) catch unreachable;
|
||||
const delta = 6 - len;
|
||||
self.end -= delta;
|
||||
}
|
||||
}
|
||||
|
||||
fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 {
|
||||
@ -711,69 +767,37 @@ pub const ArgIteratorWindows = struct {
|
||||
}
|
||||
};
|
||||
|
||||
// The essential parts of the algorithm are described in Microsoft's documentation:
|
||||
//
|
||||
// - <https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments>
|
||||
// - <https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw>
|
||||
//
|
||||
// David Deley explains some additional undocumented quirks in great detail:
|
||||
//
|
||||
// - <https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES>
|
||||
//
|
||||
// Code points <= U+0020 terminating an unquoted first argument was discovered independently by
|
||||
// testing and observing the behavior of 'CommandLineToArgvW' on Windows 10.
|
||||
|
||||
fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T {
|
||||
// The first argument (the executable name) uses different parsing rules.
|
||||
if (self.index == 0) {
|
||||
var char = if (self.cmd_line.len != 0) self.cmd_line[0] else 0;
|
||||
switch (char) {
|
||||
0 => {
|
||||
// Immediately complete the iterator.
|
||||
// 'CommandLineToArgvW' would return the name of the current executable here.
|
||||
return strategy.eof;
|
||||
},
|
||||
'"' => {
|
||||
// If the first character is a quote, read everything until the next quote (then
|
||||
// skip that quote), or until the end of the string.
|
||||
self.index += 1;
|
||||
while (true) : (self.index += 1) {
|
||||
char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
||||
switch (char) {
|
||||
0 => {
|
||||
return strategy.yieldArg(self);
|
||||
},
|
||||
'"' => {
|
||||
self.index += 1;
|
||||
return strategy.yieldArg(self);
|
||||
},
|
||||
else => {
|
||||
strategy.emitCharacter(self, char);
|
||||
},
|
||||
if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
|
||||
// Immediately complete the iterator.
|
||||
// The C runtime would return the name of the current executable here.
|
||||
return strategy.eof;
|
||||
}
|
||||
|
||||
var inside_quotes = false;
|
||||
while (true) : (self.index += 1) {
|
||||
const char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
||||
switch (char) {
|
||||
0 => {
|
||||
return strategy.yieldArg(self);
|
||||
},
|
||||
'"' => {
|
||||
inside_quotes = !inside_quotes;
|
||||
},
|
||||
' ', '\t' => {
|
||||
if (inside_quotes)
|
||||
strategy.emitCharacter(self, char)
|
||||
else {
|
||||
self.index += 1;
|
||||
return strategy.yieldArg(self);
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// Otherwise, read everything until the next space or ASCII control character
|
||||
// (not including DEL) (then skip that character), or until the end of the
|
||||
// string. This means that if the command-line string starts with one of these
|
||||
// characters, the first returned argument will be the empty string.
|
||||
while (true) : (self.index += 1) {
|
||||
char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
||||
switch (char) {
|
||||
0 => {
|
||||
return strategy.yieldArg(self);
|
||||
},
|
||||
'\x01'...' ' => {
|
||||
self.index += 1;
|
||||
return strategy.yieldArg(self);
|
||||
},
|
||||
else => {
|
||||
strategy.emitCharacter(self, char);
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
else => {
|
||||
strategy.emitCharacter(self, char);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -791,9 +815,10 @@ pub const ArgIteratorWindows = struct {
|
||||
//
|
||||
// - The end of the string always terminates the current argument.
|
||||
// - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
|
||||
// - 2n backslashes followed by a quote emit n backslashes. If in 'inside_quotes' and the
|
||||
// quote is immediately followed by a second quote, one quote is emitted and the other is
|
||||
// skipped, otherwise, the quote is skipped. Finally, 'inside_quotes' is toggled.
|
||||
// - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
|
||||
// If in 'inside_quotes' and the quote is immediately followed by a second quote,
|
||||
// one quote is emitted and the other is skipped, otherwise, the quote is skipped
|
||||
// and 'inside_quotes' is toggled.
|
||||
// - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
|
||||
// - n backslashes not followed by a quote emit n backslashes.
|
||||
var backslash_count: usize = 0;
|
||||
@ -826,8 +851,9 @@ pub const ArgIteratorWindows = struct {
|
||||
{
|
||||
strategy.emitCharacter(self, '"');
|
||||
self.index += 1;
|
||||
} else {
|
||||
inside_quotes = !inside_quotes;
|
||||
}
|
||||
inside_quotes = !inside_quotes;
|
||||
}
|
||||
},
|
||||
'\\' => {
|
||||
@ -1215,10 +1241,10 @@ test ArgIteratorWindows {
|
||||
// Separators
|
||||
try t("aa bb cc", &.{ "aa", "bb", "cc" });
|
||||
try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
|
||||
try t("aa\nbb\ncc", &.{ "aa", "bb\ncc" });
|
||||
try t("aa\r\nbb\r\ncc", &.{ "aa", "\nbb\r\ncc" });
|
||||
try t("aa\rbb\rcc", &.{ "aa", "bb\rcc" });
|
||||
try t("aa\x07bb\x07cc", &.{ "aa", "bb\x07cc" });
|
||||
try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
|
||||
try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
|
||||
try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
|
||||
try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
|
||||
try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
|
||||
try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
|
||||
|
||||
@ -1227,22 +1253,22 @@ test ArgIteratorWindows {
|
||||
try t(" aa bb ", &.{ "", "aa", "bb" });
|
||||
try t("\t\t", &.{""});
|
||||
try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
|
||||
try t("\n\n", &.{ "", "\n" });
|
||||
try t("\n\naa\n\nbb\n\n", &.{ "", "\naa\n\nbb\n\n" });
|
||||
try t("\n\n", &.{"\n\n"});
|
||||
try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
|
||||
|
||||
// Executable name with quotes/backslashes
|
||||
try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
|
||||
try t("\"", &.{""});
|
||||
try t("\"\"", &.{""});
|
||||
try t("\"\"\"", &.{ "", "" });
|
||||
try t("\"\"\"\"", &.{ "", "" });
|
||||
try t("\"\"\"\"\"", &.{ "", "\"" });
|
||||
try t("aa\"bb\"cc\"dd", &.{"aa\"bb\"cc\"dd"});
|
||||
try t("aa\"bb cc\"dd", &.{ "aa\"bb", "ccdd" });
|
||||
try t("\"aa\\\"bb\"", &.{ "aa\\", "bb" });
|
||||
try t("\"\"\"", &.{""});
|
||||
try t("\"\"\"\"", &.{""});
|
||||
try t("\"\"\"\"\"", &.{""});
|
||||
try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
|
||||
try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
|
||||
try t("\"aa\\\"bb\"", &.{"aa\\bb"});
|
||||
try t("\"aa\\\\\"", &.{"aa\\\\"});
|
||||
try t("aa\\\"bb", &.{"aa\\\"bb"});
|
||||
try t("aa\\\\\"bb", &.{"aa\\\\\"bb"});
|
||||
try t("aa\\\"bb", &.{"aa\\bb"});
|
||||
try t("aa\\\\\"bb", &.{"aa\\\\bb"});
|
||||
|
||||
// Arguments with quotes/backslashes
|
||||
try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
|
||||
@ -1252,29 +1278,66 @@ test ArgIteratorWindows {
|
||||
try t(". \"\"", &.{ ".", "" });
|
||||
try t(". \"\"\"", &.{ ".", "\"" });
|
||||
try t(". \"\"\"\"", &.{ ".", "\"" });
|
||||
try t(". \"\"\"\"\"", &.{ ".", "\"" });
|
||||
try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
|
||||
try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
|
||||
try t(". \" \"", &.{ ".", " " });
|
||||
try t(". \" \"\"", &.{ ".", " \"" });
|
||||
try t(". \" \"\"\"", &.{ ".", " \"" });
|
||||
try t(". \" \"\"\"\"", &.{ ".", " \"" });
|
||||
try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
||||
try t(". \\\"", &.{ ".", "\"" });
|
||||
try t(". \\\"\"", &.{ ".", "\"" });
|
||||
try t(". \\\"\"\"", &.{ ".", "\"" });
|
||||
try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
|
||||
try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
|
||||
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"" });
|
||||
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
|
||||
try t(". \" \\\"", &.{ ".", " \"" });
|
||||
try t(". \" \\\"\"", &.{ ".", " \"" });
|
||||
try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"" });
|
||||
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
||||
try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
||||
try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
|
||||
try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
|
||||
try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
|
||||
|
||||
// From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
|
||||
try t(
|
||||
\\foo.exe "abc" d e
|
||||
, &.{ "foo.exe", "abc", "d", "e" });
|
||||
try t(
|
||||
\\foo.exe a\\b d"e f"g h
|
||||
, &.{ "foo.exe", "a\\\\b", "de fg", "h" });
|
||||
try t(
|
||||
\\foo.exe a\\\"b c d
|
||||
, &.{ "foo.exe", "a\\\"b", "c", "d" });
|
||||
try t(
|
||||
\\foo.exe a\\\\"b c" d e
|
||||
, &.{ "foo.exe", "a\\\\b c", "d", "e" });
|
||||
try t(
|
||||
\\foo.exe a"b"" c d
|
||||
, &.{ "foo.exe", "ab\" c d" });
|
||||
|
||||
// From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
|
||||
try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
|
||||
try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
|
||||
try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
|
||||
try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
|
||||
try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
|
||||
try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
|
||||
try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
|
||||
try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
|
||||
try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
|
||||
|
||||
// Surrogate pair encoding of 𐐷 separated by quotes.
|
||||
// Encoded as WTF-16:
|
||||
// "<0xD801>"<0xDC37>
|
||||
// Encoded as WTF-8:
|
||||
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
|
||||
// During parsing, the quotes drop out and the surrogate pair
|
||||
// should end up encoded as its normal UTF-8 representation.
|
||||
try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
|
||||
}
|
||||
|
||||
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
|
||||
|
||||
@ -104,6 +104,9 @@
|
||||
.windows_spawn = .{
|
||||
.path = "windows_spawn",
|
||||
},
|
||||
.windows_argv = .{
|
||||
.path = "windows_argv",
|
||||
},
|
||||
.self_exe_symlink = .{
|
||||
.path = "self_exe_symlink",
|
||||
},
|
||||
|
||||
19
test/standalone/windows_argv/README.md
Normal file
19
test/standalone/windows_argv/README.md
Normal file
@ -0,0 +1,19 @@
|
||||
Tests that Zig's `std.process.ArgIteratorWindows` is compatible with both the MSVC and MinGW C runtimes' argv splitting algorithms.
|
||||
|
||||
The method of testing is:
|
||||
- Compile a C file with `wmain` as its entry point
|
||||
- The C `wmain` calls a Zig-implemented `verify` function that takes the `argv` from `wmain` and compares it to the argv gotten from `std.proccess.argsAlloc` (which takes `kernel32.GetCommandLineW()` and splits it)
|
||||
- The compiled C program is spawned continuously as a child process by the implementation in `fuzz.zig` with randomly generated command lines
|
||||
+ On Windows, the 'application name' and the 'command line' are disjoint concepts. That is, you can spawn `foo.exe` but set the command line to `bar.exe`, and `CreateProcessW` will spawn `foo.exe` but `argv[0]` will be `bar.exe`. This quirk allows us to test arbitrary `argv[0]` values as well which otherwise wouldn't be possible.
|
||||
|
||||
Note: This is intentionally testing against the C runtime argv splitting and *not* [`CommandLineToArgvW`](https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw), since the C runtime argv splitting was updated in 2008 but `CommandLineToArgvW` still uses the pre-2008 algorithm (which differs in both `argv[0]` rules and `""`; see [here](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESDOC) for details)
|
||||
|
||||
---
|
||||
|
||||
In addition to being run during `zig build test-standalone`, this test can be run on its own via `zig build test` from within this directory.
|
||||
|
||||
When run on its own:
|
||||
- `-Diterations=<num>` can be used to set the max fuzzing iterations, and `-Diterations=0` can be used to fuzz indefinitely
|
||||
- `-Dseed=<num>` can be used to set the PRNG seed for fuzz testing. If not provided, then the seed is chosen at random during `build.zig` compilation.
|
||||
|
||||
On failure, the number of iterations and the seed can be seen in the failing command, e.g. in `path\to\fuzz.exe path\to\verify-msvc.exe 100 2780392459403250529`, the iterations is `100` and the seed is `2780392459403250529`.
|
||||
100
test/standalone/windows_argv/build.zig
Normal file
100
test/standalone/windows_argv/build.zig
Normal file
@ -0,0 +1,100 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn build(b: *std.Build) !void {
|
||||
const test_step = b.step("test", "Test it");
|
||||
b.default_step = test_step;
|
||||
|
||||
if (builtin.os.tag != .windows) return;
|
||||
|
||||
const optimize: std.builtin.OptimizeMode = .Debug;
|
||||
|
||||
const lib_gnu = b.addStaticLibrary(.{
|
||||
.name = "toargv-gnu",
|
||||
.root_source_file = .{ .path = "lib.zig" },
|
||||
.target = b.resolveTargetQuery(.{
|
||||
.abi = .gnu,
|
||||
}),
|
||||
.optimize = optimize,
|
||||
});
|
||||
const verify_gnu = b.addExecutable(.{
|
||||
.name = "verify-gnu",
|
||||
.target = b.resolveTargetQuery(.{
|
||||
.abi = .gnu,
|
||||
}),
|
||||
.optimize = optimize,
|
||||
});
|
||||
verify_gnu.addCSourceFile(.{
|
||||
.file = .{ .path = "verify.c" },
|
||||
.flags = &.{ "-DUNICODE", "-D_UNICODE" },
|
||||
});
|
||||
verify_gnu.mingw_unicode_entry_point = true;
|
||||
verify_gnu.linkLibrary(lib_gnu);
|
||||
verify_gnu.linkLibC();
|
||||
|
||||
const fuzz = b.addExecutable(.{
|
||||
.name = "fuzz",
|
||||
.root_source_file = .{ .path = "fuzz.zig" },
|
||||
.target = b.host,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
const fuzz_max_iterations = b.option(u64, "iterations", "The max fuzz iterations (default: 100)") orelse 100;
|
||||
const fuzz_iterations_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_max_iterations}) catch @panic("oom");
|
||||
|
||||
const fuzz_seed = b.option(u64, "seed", "Seed to use for the PRNG (default: random)") orelse seed: {
|
||||
var buf: [8]u8 = undefined;
|
||||
try std.posix.getrandom(&buf);
|
||||
break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
|
||||
};
|
||||
const fuzz_seed_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_seed}) catch @panic("oom");
|
||||
|
||||
const run_gnu = b.addRunArtifact(fuzz);
|
||||
run_gnu.setName("fuzz-gnu");
|
||||
run_gnu.addArtifactArg(verify_gnu);
|
||||
run_gnu.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
|
||||
run_gnu.expectExitCode(0);
|
||||
|
||||
test_step.dependOn(&run_gnu.step);
|
||||
|
||||
// Only target the MSVC ABI if MSVC/Windows SDK is available
|
||||
const has_msvc = has_msvc: {
|
||||
const sdk = std.zig.WindowsSdk.find(b.allocator) catch |err| switch (err) {
|
||||
error.OutOfMemory => @panic("oom"),
|
||||
else => break :has_msvc false,
|
||||
};
|
||||
defer sdk.free(b.allocator);
|
||||
break :has_msvc true;
|
||||
};
|
||||
if (has_msvc) {
|
||||
const lib_msvc = b.addStaticLibrary(.{
|
||||
.name = "toargv-msvc",
|
||||
.root_source_file = .{ .path = "lib.zig" },
|
||||
.target = b.resolveTargetQuery(.{
|
||||
.abi = .msvc,
|
||||
}),
|
||||
.optimize = optimize,
|
||||
});
|
||||
const verify_msvc = b.addExecutable(.{
|
||||
.name = "verify-msvc",
|
||||
.target = b.resolveTargetQuery(.{
|
||||
.abi = .msvc,
|
||||
}),
|
||||
.optimize = optimize,
|
||||
});
|
||||
verify_msvc.addCSourceFile(.{
|
||||
.file = .{ .path = "verify.c" },
|
||||
.flags = &.{ "-DUNICODE", "-D_UNICODE" },
|
||||
});
|
||||
verify_msvc.linkLibrary(lib_msvc);
|
||||
verify_msvc.linkLibC();
|
||||
|
||||
const run_msvc = b.addRunArtifact(fuzz);
|
||||
run_msvc.setName("fuzz-msvc");
|
||||
run_msvc.addArtifactArg(verify_msvc);
|
||||
run_msvc.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
|
||||
run_msvc.expectExitCode(0);
|
||||
|
||||
test_step.dependOn(&run_msvc.step);
|
||||
}
|
||||
}
|
||||
159
test/standalone/windows_argv/fuzz.zig
Normal file
159
test/standalone/windows_argv/fuzz.zig
Normal file
@ -0,0 +1,159 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const windows = std.os.windows;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer std.debug.assert(gpa.deinit() == .ok);
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(allocator);
|
||||
defer std.process.argsFree(allocator, args);
|
||||
|
||||
if (args.len < 2) return error.MissingArgs;
|
||||
|
||||
const verify_path_wtf8 = args[1];
|
||||
const verify_path_w = try std.unicode.wtf8ToWtf16LeAllocZ(allocator, verify_path_wtf8);
|
||||
defer allocator.free(verify_path_w);
|
||||
|
||||
const iterations: u64 = iterations: {
|
||||
if (args.len < 3) break :iterations 0;
|
||||
break :iterations try std.fmt.parseUnsigned(u64, args[2], 10);
|
||||
};
|
||||
|
||||
var rand_seed = false;
|
||||
const seed: u64 = seed: {
|
||||
if (args.len < 4) {
|
||||
rand_seed = true;
|
||||
var buf: [8]u8 = undefined;
|
||||
try std.posix.getrandom(&buf);
|
||||
break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
|
||||
}
|
||||
break :seed try std.fmt.parseUnsigned(u64, args[3], 10);
|
||||
};
|
||||
var random = std.rand.DefaultPrng.init(seed);
|
||||
const rand = random.random();
|
||||
|
||||
// If the seed was not given via the CLI, then output the
|
||||
// randomly chosen seed so that this run can be reproduced
|
||||
if (rand_seed) {
|
||||
std.debug.print("rand seed: {}\n", .{seed});
|
||||
}
|
||||
|
||||
var cmd_line_w_buf = std.ArrayList(u16).init(allocator);
|
||||
defer cmd_line_w_buf.deinit();
|
||||
|
||||
var i: u64 = 0;
|
||||
var errors: u64 = 0;
|
||||
while (iterations == 0 or i < iterations) {
|
||||
const cmd_line_w = try randomCommandLineW(allocator, rand);
|
||||
defer allocator.free(cmd_line_w);
|
||||
|
||||
// avoid known difference for 0-length command lines
|
||||
if (cmd_line_w.len == 0 or cmd_line_w[0] == '\x00') continue;
|
||||
|
||||
const exit_code = try spawnVerify(verify_path_w, cmd_line_w);
|
||||
if (exit_code != 0) {
|
||||
std.debug.print(">>> found discrepancy <<<\n", .{});
|
||||
const cmd_line_wtf8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, cmd_line_w);
|
||||
defer allocator.free(cmd_line_wtf8);
|
||||
std.debug.print("\"{}\"\n\n", .{std.zig.fmtEscapes(cmd_line_wtf8)});
|
||||
|
||||
errors += 1;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
if (errors > 0) {
|
||||
// we never get here if iterations is 0 so we don't have to worry about that case
|
||||
std.debug.print("found {} discrepancies in {} iterations\n", .{ errors, iterations });
|
||||
return error.FoundDiscrepancies;
|
||||
}
|
||||
}
|
||||
|
||||
fn randomCommandLineW(allocator: Allocator, rand: std.rand.Random) ![:0]const u16 {
|
||||
const Choice = enum {
|
||||
backslash,
|
||||
quote,
|
||||
space,
|
||||
tab,
|
||||
control,
|
||||
printable,
|
||||
non_ascii,
|
||||
};
|
||||
|
||||
const choices = rand.uintAtMostBiased(u16, 256);
|
||||
var buf = try std.ArrayList(u16).initCapacity(allocator, choices);
|
||||
errdefer buf.deinit();
|
||||
|
||||
for (0..choices) |_| {
|
||||
const choice = rand.enumValue(Choice);
|
||||
const code_unit = switch (choice) {
|
||||
.backslash => '\\',
|
||||
.quote => '"',
|
||||
.space => ' ',
|
||||
.tab => '\t',
|
||||
.control => switch (rand.uintAtMostBiased(u8, 0x21)) {
|
||||
0x21 => '\x7F',
|
||||
else => |b| b,
|
||||
},
|
||||
.printable => '!' + rand.uintAtMostBiased(u8, '~' - '!'),
|
||||
.non_ascii => rand.intRangeAtMostBiased(u16, 0x80, 0xFFFF),
|
||||
};
|
||||
try buf.append(std.mem.nativeToLittle(u16, code_unit));
|
||||
}
|
||||
|
||||
return buf.toOwnedSliceSentinel(0);
|
||||
}
|
||||
|
||||
/// Returns the exit code of the verify process
|
||||
fn spawnVerify(verify_path: [:0]const u16, cmd_line: [:0]const u16) !windows.DWORD {
|
||||
const child_proc = spawn: {
|
||||
var startup_info: windows.STARTUPINFOW = .{
|
||||
.cb = @sizeOf(windows.STARTUPINFOW),
|
||||
.lpReserved = null,
|
||||
.lpDesktop = null,
|
||||
.lpTitle = null,
|
||||
.dwX = 0,
|
||||
.dwY = 0,
|
||||
.dwXSize = 0,
|
||||
.dwYSize = 0,
|
||||
.dwXCountChars = 0,
|
||||
.dwYCountChars = 0,
|
||||
.dwFillAttribute = 0,
|
||||
.dwFlags = windows.STARTF_USESTDHANDLES,
|
||||
.wShowWindow = 0,
|
||||
.cbReserved2 = 0,
|
||||
.lpReserved2 = null,
|
||||
.hStdInput = null,
|
||||
.hStdOutput = null,
|
||||
.hStdError = windows.GetStdHandle(windows.STD_ERROR_HANDLE) catch null,
|
||||
};
|
||||
var proc_info: windows.PROCESS_INFORMATION = undefined;
|
||||
|
||||
try windows.CreateProcessW(
|
||||
@constCast(verify_path.ptr),
|
||||
@constCast(cmd_line.ptr),
|
||||
null,
|
||||
null,
|
||||
windows.TRUE,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
&startup_info,
|
||||
&proc_info,
|
||||
);
|
||||
windows.CloseHandle(proc_info.hThread);
|
||||
|
||||
break :spawn proc_info.hProcess;
|
||||
};
|
||||
defer windows.CloseHandle(child_proc);
|
||||
try windows.WaitForSingleObjectEx(child_proc, windows.INFINITE, false);
|
||||
|
||||
var exit_code: windows.DWORD = undefined;
|
||||
if (windows.kernel32.GetExitCodeProcess(child_proc, &exit_code) == 0) {
|
||||
return error.UnableToGetExitCode;
|
||||
}
|
||||
return exit_code;
|
||||
}
|
||||
8
test/standalone/windows_argv/lib.h
Normal file
8
test/standalone/windows_argv/lib.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef _LIB_H_
|
||||
#define _LIB_H_
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
int verify(int argc, wchar_t *argv[]);
|
||||
|
||||
#endif
|
||||
59
test/standalone/windows_argv/lib.zig
Normal file
59
test/standalone/windows_argv/lib.zig
Normal file
@ -0,0 +1,59 @@
|
||||
const std = @import("std");
|
||||
|
||||
/// Returns 1 on success, 0 on failure
|
||||
export fn verify(argc: c_int, argv: [*]const [*:0]const u16) c_int {
|
||||
const argv_slice = argv[0..@intCast(argc)];
|
||||
testArgv(argv_slice) catch |err| switch (err) {
|
||||
error.OutOfMemory => @panic("oom"),
|
||||
error.Overflow => @panic("bytes needed to contain args would overflow usize"),
|
||||
error.ArgvMismatch => return 0,
|
||||
};
|
||||
return 1;
|
||||
}
|
||||
|
||||
fn testArgv(expected_args: []const [*:0]const u16) !void {
|
||||
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena_state.deinit();
|
||||
const allocator = arena_state.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(allocator);
|
||||
var wtf8_buf = std.ArrayList(u8).init(allocator);
|
||||
|
||||
var eql = true;
|
||||
if (args.len != expected_args.len) eql = false;
|
||||
|
||||
const min_len = @min(expected_args.len, args.len);
|
||||
for (expected_args[0..min_len], args[0..min_len], 0..) |expected_arg, arg_wtf8, i| {
|
||||
wtf8_buf.clearRetainingCapacity();
|
||||
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(expected_arg));
|
||||
if (!std.mem.eql(u8, wtf8_buf.items, arg_wtf8)) {
|
||||
std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
|
||||
std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg_wtf8) });
|
||||
eql = false;
|
||||
}
|
||||
}
|
||||
if (!eql) {
|
||||
for (expected_args[min_len..], min_len..) |arg, i| {
|
||||
wtf8_buf.clearRetainingCapacity();
|
||||
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
|
||||
std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
|
||||
}
|
||||
for (args[min_len..], min_len..) |arg, i| {
|
||||
std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg) });
|
||||
}
|
||||
const peb = std.os.windows.peb();
|
||||
const lpCmdLine: [*:0]u16 = @ptrCast(peb.ProcessParameters.CommandLine.Buffer);
|
||||
wtf8_buf.clearRetainingCapacity();
|
||||
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(lpCmdLine));
|
||||
std.debug.print("command line: \"{}\"\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
|
||||
std.debug.print("expected argv:\n", .{});
|
||||
std.debug.print("&.{{\n", .{});
|
||||
for (expected_args) |arg| {
|
||||
wtf8_buf.clearRetainingCapacity();
|
||||
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
|
||||
std.debug.print(" \"{}\",\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
|
||||
}
|
||||
std.debug.print("}}\n", .{});
|
||||
return error.ArgvMismatch;
|
||||
}
|
||||
}
|
||||
7
test/standalone/windows_argv/verify.c
Normal file
7
test/standalone/windows_argv/verify.c
Normal file
@ -0,0 +1,7 @@
|
||||
#include <windows.h>
|
||||
#include "lib.h"
|
||||
|
||||
int wmain(int argc, wchar_t *argv[]) {
|
||||
if (!verify(argc, argv)) return 1;
|
||||
return 0;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user