From 10914dc31000bc5c3b7164a4101808719d3da07f Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 13 Jul 2024 15:03:55 -0700 Subject: [PATCH 1/4] ArgIteratorWindows: Clarify buffer length comment --- lib/std/process.zig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/std/process.zig b/lib/std/process.zig index d40753f19d..542376f580 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -683,10 +683,12 @@ pub const ArgIteratorWindows = struct { const wtf8_len = unicode.calcWtf8Len(cmd_line); // This buffer must be large enough to contain contiguous NUL-terminated slices - // of each argument. For arguments past the first one, space for the NUL-terminator - // is guaranteed due to the necessary whitespace between arugments. However, we need - // one extra byte to guarantee enough room for the NUL terminator if the command line - // ends up being exactly 1 argument long with no quotes, etc. + // of each argument. + // - During parsing, the length of a parsed argument will always be equal to + // to less than its unparsed length + // - The first argument needs one extra byte of space allocated for its NUL + // terminator, but for each subsequent argument the necessary whitespace + // between arguments guarantees room for their NUL terminator(s). const buffer = try allocator.alloc(u8, wtf8_len + 1); errdefer allocator.free(buffer); From 1418c8a5d49ecc38a20df07151a7fe942992aea4 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 13 Jul 2024 16:54:00 -0700 Subject: [PATCH 2/4] ArgIteratorWindows: Store last emitted code unit instead of checking the last 6 emitted bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, to ensure args were encoded as well-formed WTF-8 (i.e. no encoded surrogate pairs), the code unit would be encoded and then the last 6 emitted bytes would be checked to see if they were a surrogate pair, and this was done for any emitted code unit (although this was not necessary, it should have only been done when emitting a low surrogate). After this commit, we still want to ensure well-formed WTF-8, but, to do so, the last emitted code point is stored, meaning we can just directly check that the last code unit is a high surrogate and the current code unit is a low surrogate to determine if we have a surrogate pair. This provides some performance benefit over and above a "use the same strategy as before but only check when we're emitting a low surrogate" implementation: Benchmark 1 (111 runs): benchargv-master.exe measurement mean ± σ min … max outliers delta wall_time 45.2ms ± 532us 44.5ms … 49.4ms 2 ( 2%) 0% peak_rss 6.49MB ± 3.94KB 6.46MB … 6.49MB 10 ( 9%) 0% Benchmark 2 (154 runs): benchargv-storelast.exe measurement mean ± σ min … max outliers delta wall_time 32.6ms ± 293us 32.2ms … 34.2ms 8 ( 5%) ⚡- 27.8% ± 0.2% peak_rss 6.49MB ± 5.15KB 6.46MB … 6.49MB 15 (10%) - 0.0% ± 0.0% Benchmark 3 (131 runs): benchargv-onlylow.exe measurement mean ± σ min … max outliers delta wall_time 38.4ms ± 257us 37.9ms … 39.6ms 5 ( 4%) ⚡- 15.1% ± 0.2% peak_rss 6.49MB ± 5.70KB 6.46MB … 6.49MB 9 ( 7%) - 0.0% ± 0.0% --- lib/std/process.zig | 92 ++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/lib/std/process.zig b/lib/std/process.zig index 542376f580..be8576442d 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -717,14 +717,20 @@ pub const ArgIteratorWindows = struct { const eof = null; - fn emitBackslashes(self: *ArgIteratorWindows, count: usize) void { - for (0..count) |_| emitCharacter(self, '\\'); + /// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`. + fn emitBackslashes(self: *ArgIteratorWindows, count: usize, last_emitted_code_unit: ?u16) ?u16 { + for (0..count) |_| { + self.buffer[self.end] = '\\'; + self.end += 1; + } + return if (count != 0) '\\' else last_emitted_code_unit; } - fn emitCharacter(self: *ArgIteratorWindows, code_unit: u16) void { - const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable; - self.end += wtf8_len; - + /// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then + /// the previously emitted high surrogate is overwritten by the codepoint encoded + /// by the surrogate pair, and `null` is returned. + /// Otherwise, `code_unit` is emitted and returned. + fn emitCharacter(self: *ArgIteratorWindows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 { // Because we are emitting WTF-8, we need to // check to see if we've emitted two consecutive surrogate // codepoints that form a valid surrogate pair in order @@ -745,28 +751,24 @@ pub const ArgIteratorWindows = struct { // and emit the codepoint it encodes, which in this // example is U+10437 (𐐷), which is encoded in UTF-8 as: // <0xF0><0x90><0x90><0xB7> - concatSurrogatePair(self); - } + if (last_emitted_code_unit != null and + std.unicode.utf16IsLowSurrogate(code_unit) and + std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?)) + { + const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable; - fn concatSurrogatePair(self: *ArgIteratorWindows) void { - // Surrogate codepoints are always encoded as 3 bytes, so there - // must be 6 bytes for a surrogate pair to exist. - if (self.end - self.start >= 6) { - const window = self.buffer[self.end - 6 .. self.end]; - const view = unicode.Wtf8View.init(window) catch return; - var it = view.iterator(); - var pair: [2]u16 = undefined; - pair[0] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return); - if (!unicode.utf16IsHighSurrogate(std.mem.littleToNative(u16, pair[0]))) return; - pair[1] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return); - if (!unicode.utf16IsLowSurrogate(std.mem.littleToNative(u16, pair[1]))) return; - // We know we have a valid surrogate pair, so convert - // it to UTF-8, overwriting the surrogate pair's bytes - // and then chop off the extra bytes. - const len = unicode.utf16LeToUtf8(window, &pair) catch unreachable; - const delta = 6 - len; - self.end -= delta; + // Unpaired surrogate is 3 bytes long + const dest = self.buffer[self.end - 3 ..]; + const len = unicode.utf8Encode(codepoint, dest) catch unreachable; + // All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes + assert(len == 4); + self.end += 1; + return null; } + + const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable; + self.end += wtf8_len; + return code_unit; } fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 { @@ -783,9 +785,13 @@ pub const ArgIteratorWindows = struct { const eof = false; - fn emitBackslashes(_: *ArgIteratorWindows, _: usize) void {} + fn emitBackslashes(_: *ArgIteratorWindows, _: usize, last_emitted_code_unit: ?u16) ?u16 { + return last_emitted_code_unit; + } - fn emitCharacter(_: *ArgIteratorWindows, _: u16) void {} + fn emitCharacter(_: *ArgIteratorWindows, _: u16, last_emitted_code_unit: ?u16) ?u16 { + return last_emitted_code_unit; + } fn yieldArg(_: *ArgIteratorWindows) bool { return true; @@ -793,6 +799,7 @@ pub const ArgIteratorWindows = struct { }; fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T { + var last_emitted_code_unit: ?u16 = null; // The first argument (the executable name) uses different parsing rules. if (self.index == 0) { if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) { @@ -815,15 +822,15 @@ pub const ArgIteratorWindows = struct { inside_quotes = !inside_quotes; }, ' ', '\t' => { - if (inside_quotes) - strategy.emitCharacter(self, char) - else { + if (inside_quotes) { + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + } else { self.index += 1; return strategy.yieldArg(self); } }, else => { - strategy.emitCharacter(self, char); + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); }, } } @@ -861,29 +868,28 @@ pub const ArgIteratorWindows = struct { 0; switch (char) { 0 => { - strategy.emitBackslashes(self, backslash_count); + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); return strategy.yieldArg(self); }, ' ', '\t' => { - strategy.emitBackslashes(self, backslash_count); + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); backslash_count = 0; - if (inside_quotes) - strategy.emitCharacter(self, char) - else - return strategy.yieldArg(self); + if (inside_quotes) { + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + } else return strategy.yieldArg(self); }, '"' => { const char_is_escaped_quote = backslash_count % 2 != 0; - strategy.emitBackslashes(self, backslash_count / 2); + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit); backslash_count = 0; if (char_is_escaped_quote) { - strategy.emitCharacter(self, '"'); + last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); } else { if (inside_quotes and self.index + 1 != self.cmd_line.len and mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"') { - strategy.emitCharacter(self, '"'); + last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); self.index += 1; } else { inside_quotes = !inside_quotes; @@ -894,9 +900,9 @@ pub const ArgIteratorWindows = struct { backslash_count += 1; }, else => { - strategy.emitBackslashes(self, backslash_count); + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); backslash_count = 0; - strategy.emitCharacter(self, char); + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); }, } } From 1a62cfffa79917d930b77f2598b1bfc93efeede5 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 13 Jul 2024 17:25:06 -0700 Subject: [PATCH 3/4] Replace GetCommandLineW with PEB access, delete GetCommandLine bindings --- lib/std/os/windows/kernel32.zig | 3 --- lib/std/process.zig | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 3d72b6f1a2..d7c9625bda 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -161,9 +161,6 @@ pub extern "kernel32" fn FormatMessageW(dwFlags: DWORD, lpSource: ?LPVOID, dwMes pub extern "kernel32" fn FreeEnvironmentStringsW(penv: [*:0]u16) callconv(WINAPI) BOOL; -pub extern "kernel32" fn GetCommandLineA() callconv(WINAPI) LPSTR; -pub extern "kernel32" fn GetCommandLineW() callconv(WINAPI) LPWSTR; - pub extern "kernel32" fn GetConsoleMode(in_hConsoleHandle: HANDLE, out_lpMode: *DWORD) callconv(WINAPI) BOOL; pub extern "kernel32" fn SetConsoleMode(in_hConsoleHandle: HANDLE, in_dwMode: DWORD) callconv(WINAPI) BOOL; diff --git a/lib/std/process.zig b/lib/std/process.zig index be8576442d..434e0fbbcc 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -1150,8 +1150,9 @@ pub const ArgIterator = struct { return ArgIterator{ .inner = try InnerType.init(allocator) }; } if (native_os == .windows) { - const cmd_line_w = windows.kernel32.GetCommandLineW(); - return ArgIterator{ .inner = try InnerType.init(allocator, cmd_line_w) }; + const cmd_line = std.os.windows.peb().ProcessParameters.CommandLine; + const cmd_line_w = cmd_line.Buffer.?[0 .. cmd_line.Length / 2 :0]; + return ArgIterator{ .inner = try InnerType.init(allocator, cmd_line_w.ptr) }; } return ArgIterator{ .inner = InnerType.init() }; From d48251d0f0d2de2f618afe65db3ba425b332d5c4 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 13 Jul 2024 17:29:18 -0700 Subject: [PATCH 4/4] ArgIteratorWindows.init: Take `[]const u16` slice instead of multi-item pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we use the PEB to get the precise length of the command line string, there's no need for a multi-item pointer/sliceTo call. This provides a minor speedup: Benchmark 1 (153 runs): benchargv-before.exe measurement mean ± σ min … max outliers delta wall_time 32.7ms ± 429us 32.1ms … 36.9ms 1 ( 1%) 0% peak_rss 6.49MB ± 5.62KB 6.46MB … 6.49MB 14 ( 9%) 0% Benchmark 2 (157 runs): benchargv-after.exe measurement mean ± σ min … max outliers delta wall_time 31.9ms ± 236us 31.4ms … 32.7ms 4 ( 3%) ⚡- 2.4% ± 0.2% peak_rss 6.49MB ± 4.77KB 6.46MB … 6.49MB 14 ( 9%) + 0.0% ± 0.0% --- lib/std/process.zig | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/std/process.zig b/lib/std/process.zig index 434e0fbbcc..f4b24a8013 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -664,7 +664,7 @@ pub const ArgIteratorWasi = struct { pub const ArgIteratorWindows = struct { allocator: Allocator, /// Encoded as WTF-16 LE. - cmd_line: [:0]const u16, + cmd_line: []const u16, index: usize = 0, /// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices /// of each argument encoded as WTF-8. @@ -678,9 +678,8 @@ pub const ArgIteratorWindows = struct { /// /// The iterator stores and uses `cmd_line_w`, so its memory must be valid for /// at least as long as the returned ArgIteratorWindows. - pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows { - const cmd_line = mem.sliceTo(cmd_line_w, 0); - const wtf8_len = unicode.calcWtf8Len(cmd_line); + pub fn init(allocator: Allocator, cmd_line_w: []const u16) InitError!ArgIteratorWindows { + const wtf8_len = unicode.calcWtf8Len(cmd_line_w); // This buffer must be large enough to contain contiguous NUL-terminated slices // of each argument. @@ -694,7 +693,7 @@ pub const ArgIteratorWindows = struct { return .{ .allocator = allocator, - .cmd_line = cmd_line, + .cmd_line = cmd_line_w, .buffer = buffer, }; } @@ -1151,8 +1150,8 @@ pub const ArgIterator = struct { } if (native_os == .windows) { const cmd_line = std.os.windows.peb().ProcessParameters.CommandLine; - const cmd_line_w = cmd_line.Buffer.?[0 .. cmd_line.Length / 2 :0]; - return ArgIterator{ .inner = try InnerType.init(allocator, cmd_line_w.ptr) }; + const cmd_line_w = cmd_line.Buffer.?[0 .. cmd_line.Length / 2]; + return ArgIterator{ .inner = try InnerType.init(allocator, cmd_line_w) }; } return ArgIterator{ .inner = InnerType.init() };