From 33fdc43714e34f5c4bc02c416ef4c6534acc56ca Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 13 Oct 2022 22:18:35 -0700 Subject: [PATCH 1/6] std.fs: Add MAX_NAME_BYTES Also add some NAME_MAX or equivalent definitions where necessary --- lib/std/c/darwin.zig | 1 + lib/std/c/dragonfly.zig | 1 + lib/std/c/haiku.zig | 2 ++ lib/std/fs.zig | 17 +++++++++++++++++ lib/std/os/windows.zig | 6 ++++++ 5 files changed, 27 insertions(+) diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 0dfe7a2500..17a4957e94 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -1014,6 +1014,7 @@ pub const vm_machine_attribute_val_t = isize; pub const CALENDAR_CLOCK = 1; pub const PATH_MAX = 1024; +pub const NAME_MAX = 255; pub const IOV_MAX = 16; pub const STDIN_FILENO = 0; diff --git a/lib/std/c/dragonfly.zig b/lib/std/c/dragonfly.zig index 35436d7017..ddc0db2709 100644 --- a/lib/std/c/dragonfly.zig +++ b/lib/std/c/dragonfly.zig @@ -234,6 +234,7 @@ pub const SA = struct { }; pub const PATH_MAX = 1024; +pub const NAME_MAX = 255; pub const IOV_MAX = KERN.IOV_MAX; pub const ino_t = c_ulong; diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index ba7e55ccb1..300dc9076c 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -266,6 +266,7 @@ pub const area_info = extern struct { }; pub const MAXPATHLEN = PATH_MAX; +pub const MAXNAMLEN = NAME_MAX; pub const image_info = extern struct { id: u32, @@ -371,6 +372,7 @@ pub const KERN = struct {}; pub const IOV_MAX = 1024; pub const PATH_MAX = 1024; +pub const NAME_MAX = 256; pub const STDIN_FILENO = 0; pub const STDOUT_FILENO = 1; diff --git a/lib/std/fs.zig b/lib/std/fs.zig index dfadb144eb..208f456184 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -48,6 +48,23 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { @compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)), }; +/// This represents the maximum size of a UTF-8 encoded file name component that the +/// operating system will accept. All file name components returned by file system +/// operations are assumed to fit into a UTF-8 encoded array of this length. +/// The byte count does not include a null sentinel byte. +pub const MAX_NAME_BYTES = switch (builtin.os.tag) { + .linux, .macos, .ios, .freebsd, .dragonfly, .haiku => os.NAME_MAX, + .netbsd, .openbsd, .solaris => os.MAXNAMLEN, + // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. + // If it would require 4 UTF-8 bytes, then there would be a surrogate + // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. + .windows => os.NAME_MAX * 3, + else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) + root.os.NAME_MAX + else + @compileError("NAME_MAX not implemented for " ++ @tagName(builtin.os.tag)), +}; + pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*; /// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index d68a66ed87..7bed48a2bf 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2977,6 +2977,12 @@ pub const PMEMORY_BASIC_INFORMATION = *MEMORY_BASIC_INFORMATION; /// from https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file#maximum-path-length-limitation pub const PATH_MAX_WIDE = 32767; +/// > [Each file name component can be] up to the value returned in the +/// > lpMaximumComponentLength parameter of the GetVolumeInformation function +/// > (this value is commonly 255 characters) +/// from https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation +pub const NAME_MAX = 255; + pub const FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100; pub const FORMAT_MESSAGE_ARGUMENT_ARRAY = 0x00002000; pub const FORMAT_MESSAGE_FROM_HMODULE = 0x00000800; From c6ff1a71601f42dab5c8607397b28b34085134d8 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 13 Oct 2022 21:21:16 -0700 Subject: [PATCH 2/6] Windows: Fix iterator name buffer size not handling all possible file name components Each u16 within a file name component can be encoded as up to 3 UTF-8 bytes, so we need to use MAX_NAME_BYTES to account for all possible UTF-8 encoded names. Fixes #8268 --- lib/std/fs.zig | 4 ++-- lib/std/fs/test.zig | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 208f456184..8f9e3767e4 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -58,7 +58,7 @@ pub const MAX_NAME_BYTES = switch (builtin.os.tag) { // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. // If it would require 4 UTF-8 bytes, then there would be a surrogate // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. - .windows => os.NAME_MAX * 3, + .windows => os.windows.NAME_MAX * 3, else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) root.os.NAME_MAX else @@ -697,7 +697,7 @@ pub const IterableDir = struct { index: usize, end_index: usize, first_iter: bool, - name_data: [256]u8, + name_data: [MAX_NAME_BYTES]u8, const Self = @This(); diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 028110ff9c..4f06e64e7a 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -703,6 +703,44 @@ test "makePath in a directory that no longer exists" { try testing.expectError(error.FileNotFound, tmp.dir.makePath("sub-path")); } +fn testFilenameLimits(iterable_dir: IterableDir, maxed_filename: []const u8) !void { + // setup, create a dir and a nested file both with maxed filenames, and walk the dir + { + var maxed_dir = try iterable_dir.dir.makeOpenPath(maxed_filename, .{}); + defer maxed_dir.close(); + + try maxed_dir.writeFile(maxed_filename, ""); + + var walker = try iterable_dir.walk(testing.allocator); + defer walker.deinit(); + + var count: usize = 0; + while (try walker.next()) |entry| { + try testing.expectEqualStrings(maxed_filename, entry.basename); + count += 1; + } + try testing.expectEqual(@as(usize, 2), count); + } + + // ensure that we can delete the tree + try iterable_dir.dir.deleteTree(maxed_filename); +} + +test "filename limits" { + var tmp = tmpIterableDir(.{}); + defer tmp.cleanup(); + + if (builtin.os.tag == .windows) { + // € is the character with the largest codepoint that is encoded as a single u16 in UTF-16, + // so Windows allows for NAME_MAX of them + const maxed_windows_filename = ("€".*) ** std.os.windows.NAME_MAX; + try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename); + } else { + const maxed_ascii_filename = [_]u8{'1'} ** std.fs.MAX_NAME_BYTES; + try testFilenameLimits(tmp.iterable_dir, &maxed_ascii_filename); + } +} + test "writev, readv" { var tmp = tmpDir(.{}); defer tmp.cleanup(); From dd0962d5ea62742de50c62065856580d340e6cab Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Thu, 13 Oct 2022 23:24:23 -0700 Subject: [PATCH 3/6] Add wasi branch to MAX_NAME_BYTES --- lib/std/fs.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 8f9e3767e4..8fedfd75b3 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -59,6 +59,8 @@ pub const MAX_NAME_BYTES = switch (builtin.os.tag) { // If it would require 4 UTF-8 bytes, then there would be a surrogate // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. .windows => os.windows.NAME_MAX * 3, + // TODO work out what a reasonable value we should use here + .wasi => 1024, else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) root.os.NAME_MAX else From c5d23161fcadf06089656b1f1d0fb725d7217b74 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 14 Oct 2022 00:23:00 -0700 Subject: [PATCH 4/6] Set wasi MAX_NAME_BYTES to minimum of the rest of the supported platforms This is a slightly weird situation, because the 'real' value may depend on the host platform that the WASI is being executed on. --- lib/std/fs.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 8fedfd75b3..d14f29f841 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -60,7 +60,7 @@ pub const MAX_NAME_BYTES = switch (builtin.os.tag) { // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. .windows => os.windows.NAME_MAX * 3, // TODO work out what a reasonable value we should use here - .wasi => 1024, + .wasi => 255, else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) root.os.NAME_MAX else From 348f73502e81621b7cdb8ecf9a64bc8ebcf9db97 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 14 Oct 2022 04:39:56 -0700 Subject: [PATCH 5/6] Make MAX_NAME_BYTES on WASI equivalent to the max of the other platforms Make the test use the minimum length and set MAX_NAME_BYTES to the maximum so that: - the test will work on any host platform - *and* the MAX_NAME_BYTES will be able to hold the max file name component on any host platform --- lib/std/fs.zig | 6 ++++-- lib/std/fs/test.zig | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index d14f29f841..c6b992a162 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -59,8 +59,10 @@ pub const MAX_NAME_BYTES = switch (builtin.os.tag) { // If it would require 4 UTF-8 bytes, then there would be a surrogate // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. .windows => os.windows.NAME_MAX * 3, - // TODO work out what a reasonable value we should use here - .wasi => 255, + // For WASI, the MAX_NAME will depend on the host OS, so it needs to be + // as large as the largest MAX_NAME_BYTES in order to work on any host OS. + // TODO determine if this is a reasonable approach + .wasi => os.windows.NAME_MAX * 3, else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) root.os.NAME_MAX else diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 4f06e64e7a..f7159a835c 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -735,6 +735,11 @@ test "filename limits" { // so Windows allows for NAME_MAX of them const maxed_windows_filename = ("€".*) ** std.os.windows.NAME_MAX; try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename); + } else if (builtin.os.tag == .wasi) { + // On WASI, the maxed filename depends on the host OS, so in order for this test to + // work on any host, we need to use a length that will work for all platforms. + const maxed_wasi_filename = [_]u8{'1'} ** 255; + try testFilenameLimits(tmp.iterable_dir, &maxed_wasi_filename); } else { const maxed_ascii_filename = [_]u8{'1'} ** std.fs.MAX_NAME_BYTES; try testFilenameLimits(tmp.iterable_dir, &maxed_ascii_filename); From db80225a973049f77f0a3080aae4bd2ea6084bc4 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 28 Oct 2022 01:58:47 -0700 Subject: [PATCH 6/6] fs: Some NAME_MAX/MAX_NAME_BYTES improvements --- lib/std/c/haiku.zig | 4 +++- lib/std/fs.zig | 13 ++++++++----- lib/std/fs/test.zig | 5 +++-- lib/std/os/windows.zig | 12 ++++++++++++ 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index 300dc9076c..30ab3542b9 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -372,7 +372,9 @@ pub const KERN = struct {}; pub const IOV_MAX = 1024; pub const PATH_MAX = 1024; -pub const NAME_MAX = 256; +/// NOTE: Contains room for the terminating null character (despite the POSIX +/// definition saying that NAME_MAX does not include the terminating null). +pub const NAME_MAX = 256; // limits.h pub const STDIN_FILENO = 0; pub const STDOUT_FILENO = 1; diff --git a/lib/std/fs.zig b/lib/std/fs.zig index c6b992a162..8dfd811930 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -48,19 +48,22 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { @compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)), }; -/// This represents the maximum size of a UTF-8 encoded file name component that the -/// operating system will accept. All file name components returned by file system -/// operations are assumed to fit into a UTF-8 encoded array of this length. +/// This represents the maximum size of a UTF-8 encoded file name component that +/// the platform's common file systems support. File name components returned by file system +/// operations are likely to fit into a UTF-8 encoded array of this length, but +/// (depending on the platform) this assumption may not hold for every configuration. /// The byte count does not include a null sentinel byte. pub const MAX_NAME_BYTES = switch (builtin.os.tag) { - .linux, .macos, .ios, .freebsd, .dragonfly, .haiku => os.NAME_MAX, + .linux, .macos, .ios, .freebsd, .dragonfly => os.NAME_MAX, + // Haiku's NAME_MAX includes the null terminator, so subtract one. + .haiku => os.NAME_MAX - 1, .netbsd, .openbsd, .solaris => os.MAXNAMLEN, // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. // If it would require 4 UTF-8 bytes, then there would be a surrogate // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. .windows => os.windows.NAME_MAX * 3, // For WASI, the MAX_NAME will depend on the host OS, so it needs to be - // as large as the largest MAX_NAME_BYTES in order to work on any host OS. + // as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS. // TODO determine if this is a reasonable approach .wasi => os.windows.NAME_MAX * 3, else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX")) diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index f7159a835c..f6168054b6 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -726,7 +726,7 @@ fn testFilenameLimits(iterable_dir: IterableDir, maxed_filename: []const u8) !vo try iterable_dir.dir.deleteTree(maxed_filename); } -test "filename limits" { +test "max file name component lengths" { var tmp = tmpIterableDir(.{}); defer tmp.cleanup(); @@ -737,7 +737,8 @@ test "filename limits" { try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename); } else if (builtin.os.tag == .wasi) { // On WASI, the maxed filename depends on the host OS, so in order for this test to - // work on any host, we need to use a length that will work for all platforms. + // work on any host, we need to use a length that will work for all platforms + // (i.e. the minimum MAX_NAME_BYTES of all supported platforms). const maxed_wasi_filename = [_]u8{'1'} ** 255; try testFilenameLimits(tmp.iterable_dir, &maxed_wasi_filename); } else { diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 7bed48a2bf..71dfc70d37 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -2981,6 +2981,18 @@ pub const PATH_MAX_WIDE = 32767; /// > lpMaximumComponentLength parameter of the GetVolumeInformation function /// > (this value is commonly 255 characters) /// from https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation +/// +/// > The value that is stored in the variable that *lpMaximumComponentLength points to is +/// > used to indicate that a specified file system supports long names. For example, for +/// > a FAT file system that supports long names, the function stores the value 255, rather +/// > than the previous 8.3 indicator. Long names can also be supported on systems that use +/// > the NTFS file system. +/// from https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getvolumeinformationw +/// +/// The assumption being made here is that while lpMaximumComponentLength may vary, it will never +/// be larger than 255. +/// +/// TODO: More verification of this assumption. pub const NAME_MAX = 255; pub const FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;