Merge pull request #13153 from squeek502/iterator-filename-limits

Windows: Fix Iterator name buffer size not handling all possible file name components
This commit is contained in:
Andrew Kelley 2022-10-29 23:50:37 -04:00 committed by GitHub
commit 209a0d2a83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 93 additions and 1 deletions

View File

@ -1014,6 +1014,7 @@ pub const vm_machine_attribute_val_t = isize;
pub const CALENDAR_CLOCK = 1;
pub const PATH_MAX = 1024;
pub const NAME_MAX = 255;
pub const IOV_MAX = 16;
pub const STDIN_FILENO = 0;

View File

@ -234,6 +234,7 @@ pub const SA = struct {
};
pub const PATH_MAX = 1024;
pub const NAME_MAX = 255;
pub const IOV_MAX = KERN.IOV_MAX;
pub const ino_t = c_ulong;

View File

@ -266,6 +266,7 @@ pub const area_info = extern struct {
};
pub const MAXPATHLEN = PATH_MAX;
pub const MAXNAMLEN = NAME_MAX;
pub const image_info = extern struct {
id: u32,
@ -371,6 +372,9 @@ pub const KERN = struct {};
pub const IOV_MAX = 1024;
pub const PATH_MAX = 1024;
/// NOTE: Contains room for the terminating null character (despite the POSIX
/// definition saying that NAME_MAX does not include the terminating null).
pub const NAME_MAX = 256; // limits.h
pub const STDIN_FILENO = 0;
pub const STDOUT_FILENO = 1;

View File

@ -48,6 +48,30 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)),
};
/// This represents the maximum size of a UTF-8 encoded file name component that
/// the platform's common file systems support. File name components returned by file system
/// operations are likely to fit into a UTF-8 encoded array of this length, but
/// (depending on the platform) this assumption may not hold for every configuration.
/// The byte count does not include a null sentinel byte.
pub const MAX_NAME_BYTES = switch (builtin.os.tag) {
.linux, .macos, .ios, .freebsd, .dragonfly => os.NAME_MAX,
// Haiku's NAME_MAX includes the null terminator, so subtract one.
.haiku => os.NAME_MAX - 1,
.netbsd, .openbsd, .solaris => os.MAXNAMLEN,
// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
// If it would require 4 UTF-8 bytes, then there would be a surrogate
// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
.windows => os.windows.NAME_MAX * 3,
// For WASI, the MAX_NAME will depend on the host OS, so it needs to be
// as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS.
// TODO determine if this is a reasonable approach
.wasi => os.windows.NAME_MAX * 3,
else => if (@hasDecl(root, "os") and @hasDecl(root.os, "NAME_MAX"))
root.os.NAME_MAX
else
@compileError("NAME_MAX not implemented for " ++ @tagName(builtin.os.tag)),
};
pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*;
/// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem.
@ -680,7 +704,7 @@ pub const IterableDir = struct {
index: usize,
end_index: usize,
first_iter: bool,
name_data: [256]u8,
name_data: [MAX_NAME_BYTES]u8,
const Self = @This();

View File

@ -703,6 +703,50 @@ test "makePath in a directory that no longer exists" {
try testing.expectError(error.FileNotFound, tmp.dir.makePath("sub-path"));
}
fn testFilenameLimits(iterable_dir: IterableDir, maxed_filename: []const u8) !void {
// setup, create a dir and a nested file both with maxed filenames, and walk the dir
{
var maxed_dir = try iterable_dir.dir.makeOpenPath(maxed_filename, .{});
defer maxed_dir.close();
try maxed_dir.writeFile(maxed_filename, "");
var walker = try iterable_dir.walk(testing.allocator);
defer walker.deinit();
var count: usize = 0;
while (try walker.next()) |entry| {
try testing.expectEqualStrings(maxed_filename, entry.basename);
count += 1;
}
try testing.expectEqual(@as(usize, 2), count);
}
// ensure that we can delete the tree
try iterable_dir.dir.deleteTree(maxed_filename);
}
test "max file name component lengths" {
var tmp = tmpIterableDir(.{});
defer tmp.cleanup();
if (builtin.os.tag == .windows) {
// is the character with the largest codepoint that is encoded as a single u16 in UTF-16,
// so Windows allows for NAME_MAX of them
const maxed_windows_filename = ("".*) ** std.os.windows.NAME_MAX;
try testFilenameLimits(tmp.iterable_dir, &maxed_windows_filename);
} else if (builtin.os.tag == .wasi) {
// On WASI, the maxed filename depends on the host OS, so in order for this test to
// work on any host, we need to use a length that will work for all platforms
// (i.e. the minimum MAX_NAME_BYTES of all supported platforms).
const maxed_wasi_filename = [_]u8{'1'} ** 255;
try testFilenameLimits(tmp.iterable_dir, &maxed_wasi_filename);
} else {
const maxed_ascii_filename = [_]u8{'1'} ** std.fs.MAX_NAME_BYTES;
try testFilenameLimits(tmp.iterable_dir, &maxed_ascii_filename);
}
}
test "writev, readv" {
var tmp = tmpDir(.{});
defer tmp.cleanup();

View File

@ -2977,6 +2977,24 @@ pub const PMEMORY_BASIC_INFORMATION = *MEMORY_BASIC_INFORMATION;
/// from https://docs.microsoft.com/en-us/windows/desktop/FileIO/naming-a-file#maximum-path-length-limitation
pub const PATH_MAX_WIDE = 32767;
/// > [Each file name component can be] up to the value returned in the
/// > lpMaximumComponentLength parameter of the GetVolumeInformation function
/// > (this value is commonly 255 characters)
/// from https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
///
/// > The value that is stored in the variable that *lpMaximumComponentLength points to is
/// > used to indicate that a specified file system supports long names. For example, for
/// > a FAT file system that supports long names, the function stores the value 255, rather
/// > than the previous 8.3 indicator. Long names can also be supported on systems that use
/// > the NTFS file system.
/// from https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getvolumeinformationw
///
/// The assumption being made here is that while lpMaximumComponentLength may vary, it will never
/// be larger than 255.
///
/// TODO: More verification of this assumption.
pub const NAME_MAX = 255;
pub const FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;
pub const FORMAT_MESSAGE_ARGUMENT_ARRAY = 0x00002000;
pub const FORMAT_MESSAGE_FROM_HMODULE = 0x00000800;