From 21e195a1a96100cd5bcd47b6d9565b2141ad13e1 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 18 Oct 2025 07:38:10 -0700 Subject: [PATCH] std: move some windows path checking logic --- lib/std/fs/Dir.zig | 2 +- lib/std/os/windows.zig | 9 +++++++++ lib/std/posix.zig | 4 ++-- lib/std/unicode.zig | 26 -------------------------- 4 files changed, 12 insertions(+), 29 deletions(-) diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index 5187ab69a8..c4cbaf7196 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -1575,7 +1575,7 @@ pub fn symLink( // when converting to an NT namespaced path. CreateSymbolicLink in // symLinkW will handle the necessary conversion. var target_path_w: windows.PathSpace = undefined; - try std.unicode.checkWtf8ToWtf16LeOverflow(target_path, &target_path_w.data); + try windows.checkWtf8ToWtf16LeOverflow(target_path, &target_path_w.data); target_path_w.len = try std.unicode.wtf8ToWtf16Le(&target_path_w.data, target_path); target_path_w.data[target_path_w.len] = 0; // However, we need to canonicalize any path separators to `\`, since if diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index b4780ed203..1d49a890e4 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -5739,3 +5739,12 @@ pub fn ProcessBaseAddress(handle: HANDLE) ProcessBaseAddressError!HMODULE { const ppeb: *const PEB = @ptrCast(@alignCast(peb_out.ptr)); return ppeb.ImageBaseAddress; } + +pub fn checkWtf8ToWtf16LeOverflow(wtf8: []const u8, wtf16le: []const u16) error{ BadPathName, NameTooLong }!void { + // Each u8 in UTF-8/WTF-8 correlates to at most one u16 in UTF-16LE/WTF-16LE. + if (wtf16le.len >= wtf8.len) return; + const utf16_len = std.unicode.calcUtf16LeLenImpl(wtf8, .can_encode_surrogate_half) catch + return error.BadPathName; + if (utf16_len > wtf16le.len) + return error.NameTooLong; +} diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 697ba1a59a..a58204e1dc 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -2918,7 +2918,7 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void { @compileError("WASI does not support os.chdir"); } else if (native_os == .windows) { var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; - try std.unicode.checkWtf8ToWtf16LeOverflow(dir_path, &wtf16_dir_path); + try windows.checkWtf8ToWtf16LeOverflow(dir_path, &wtf16_dir_path); const len = try std.unicode.wtf8ToWtf16Le(&wtf16_dir_path, dir_path); return chdirW(wtf16_dir_path[0..len]); } else { @@ -2935,7 +2935,7 @@ pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void { if (native_os == .windows) { const dir_path_span = mem.span(dir_path); var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; - try std.unicode.checkWtf8ToWtf16LeOverflow(dir_path_span, &wtf16_dir_path); + try windows.checkWtf8ToWtf16LeOverflow(dir_path_span, &wtf16_dir_path); const len = try std.unicode.wtf8ToWtf16Le(&wtf16_dir_path, dir_path_span); return chdirW(wtf16_dir_path[0..len]); } else if (native_os == .wasi and !builtin.link_libc) { diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 2a6dca0d8a..1aae6d488f 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -1809,31 +1809,6 @@ pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) error{InvalidWtf8}!usize return utf8ToUtf16LeImpl(wtf16le, wtf8, .can_encode_surrogate_half); } -fn checkUtf8ToUtf16LeOverflowImpl(utf8: []const u8, utf16le: []const u16, comptime surrogates: Surrogates) !void { - // Each u8 in UTF-8/WTF-8 correlates to at most one u16 in UTF-16LE/WTF-16LE. - if (utf16le.len >= utf8.len) return; - const utf16_len = calcUtf16LeLenImpl(utf8, surrogates) catch { - return switch (surrogates) { - .cannot_encode_surrogate_half => error.InvalidUtf8, - .can_encode_surrogate_half => error.InvalidWtf8, - }; - }; - if (utf16_len > utf16le.len) - return error.NameTooLong; -} - -/// Checks if calling `utf8ToUtf16Le` would overflow. Might fail if utf8 is not -/// valid UTF-8. -pub fn checkUtf8ToUtf16LeOverflow(utf8: []const u8, utf16le: []const u16) error{ InvalidUtf8, NameTooLong }!void { - return checkUtf8ToUtf16LeOverflowImpl(utf8, utf16le, .cannot_encode_surrogate_half); -} - -/// Checks if calling `utf8ToUtf16Le` would overflow. Might fail if wtf8 is not -/// valid WTF-8. -pub fn checkWtf8ToWtf16LeOverflow(wtf8: []const u8, wtf16le: []const u16) error{ InvalidWtf8, NameTooLong }!void { - return checkUtf8ToUtf16LeOverflowImpl(wtf8, wtf16le, .can_encode_surrogate_half); -} - /// Surrogate codepoints (U+D800 to U+DFFF) are replaced by the Unicode replacement /// character (U+FFFD). /// All surrogate codepoints and the replacement character are encoded as three @@ -2040,7 +2015,6 @@ fn testRoundtripWtf8(wtf8: []const u8) !void { var wtf16_buf: [32]u16 = undefined; const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, wtf8); try testing.expectEqual(wtf16_len, calcWtf16LeLen(wtf8)); - try checkWtf8ToWtf16LeOverflow(wtf8, &wtf16_buf); const wtf16 = wtf16_buf[0..wtf16_len]; var roundtripped_buf: [32]u8 = undefined;