Merge branch 'gereeter-reduced-path-max'

closes #4837
This commit is contained in:
Andrew Kelley 2020-05-29 18:41:40 -04:00
commit f107d654e0
5 changed files with 98 additions and 37 deletions

View File

@ -670,10 +670,12 @@ pub fn openSelfDebugInfo(allocator: *mem.Allocator) anyerror!DebugInfo {
} }
} }
/// This takes ownership of coff_file: users of this function should not close
/// it themselves, even on error.
/// TODO resources https://github.com/ziglang/zig/issues/4353 /// TODO resources https://github.com/ziglang/zig/issues/4353
fn openCoffDebugInfo(allocator: *mem.Allocator, coff_file_path: [:0]const u16) !ModuleDebugInfo { /// TODO it's weird to take ownership even on error, rework this code.
fn readCoffDebugInfo(allocator: *mem.Allocator, coff_file: File) !ModuleDebugInfo {
nosuspend { nosuspend {
const coff_file = try std.fs.openFileAbsoluteW(coff_file_path, .{ .intended_io_mode = .blocking });
errdefer coff_file.close(); errdefer coff_file.close();
const coff_obj = try allocator.create(coff.Coff); const coff_obj = try allocator.create(coff.Coff);
@ -851,10 +853,13 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) ![]const u8 {
return ptr[start..end]; return ptr[start..end];
} }
/// This takes ownership of elf_file: users of this function should not close
/// it themselves, even on error.
/// TODO resources https://github.com/ziglang/zig/issues/4353 /// TODO resources https://github.com/ziglang/zig/issues/4353
pub fn openElfDebugInfo(allocator: *mem.Allocator, elf_file_path: []const u8) !ModuleDebugInfo { /// TODO it's weird to take ownership even on error, rework this code.
pub fn readElfDebugInfo(allocator: *mem.Allocator, elf_file: File) !ModuleDebugInfo {
nosuspend { nosuspend {
const mapped_mem = try mapWholeFile(elf_file_path); const mapped_mem = try mapWholeFile(elf_file);
const hdr = @ptrCast(*const elf.Ehdr, &mapped_mem[0]); const hdr = @ptrCast(*const elf.Ehdr, &mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) return error.InvalidElfMagic; if (!mem.eql(u8, hdr.e_ident[0..4], "\x7fELF")) return error.InvalidElfMagic;
if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
@ -921,8 +926,11 @@ pub fn openElfDebugInfo(allocator: *mem.Allocator, elf_file_path: []const u8) !M
} }
/// TODO resources https://github.com/ziglang/zig/issues/4353 /// TODO resources https://github.com/ziglang/zig/issues/4353
fn openMachODebugInfo(allocator: *mem.Allocator, macho_file_path: []const u8) !ModuleDebugInfo { /// This takes ownership of coff_file: users of this function should not close
const mapped_mem = try mapWholeFile(macho_file_path); /// it themselves, even on error.
/// TODO it's weird to take ownership even on error, rework this code.
fn readMachODebugInfo(allocator: *mem.Allocator, macho_file: File) !ModuleDebugInfo {
const mapped_mem = try mapWholeFile(macho_file);
const hdr = @ptrCast( const hdr = @ptrCast(
*const macho.mach_header_64, *const macho.mach_header_64,
@ -1055,9 +1063,11 @@ const MachoSymbol = struct {
} }
}; };
fn mapWholeFile(path: []const u8) ![]align(mem.page_size) const u8 { /// `file` is expected to have been opened with .intended_io_mode == .blocking.
/// Takes ownership of file, even on error.
/// TODO it's weird to take ownership even on error, rework this code.
fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 {
nosuspend { nosuspend {
const file = try fs.cwd().openFile(path, .{ .intended_io_mode = .blocking });
defer file.close(); defer file.close();
const file_len = try math.cast(usize, try file.getEndPos()); const file_len = try math.cast(usize, try file.getEndPos());
@ -1140,10 +1150,11 @@ pub const DebugInfo = struct {
errdefer self.allocator.destroy(obj_di); errdefer self.allocator.destroy(obj_di);
const macho_path = mem.spanZ(std.c._dyld_get_image_name(i)); const macho_path = mem.spanZ(std.c._dyld_get_image_name(i));
obj_di.* = openMachODebugInfo(self.allocator, macho_path) catch |err| switch (err) { const macho_file = fs.cwd().openFile(macho_path, .{ .intended_io_mode = .blocking }) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo, error.FileNotFound => return error.MissingDebugInfo,
else => return err, else => return err,
}; };
obj_di.* = try readMachODebugInfo(self.allocator, macho_file);
obj_di.base_address = base_address; obj_di.base_address = base_address;
try self.address_map.putNoClobber(base_address, obj_di); try self.address_map.putNoClobber(base_address, obj_di);
@ -1221,10 +1232,11 @@ pub const DebugInfo = struct {
const obj_di = try self.allocator.create(ModuleDebugInfo); const obj_di = try self.allocator.create(ModuleDebugInfo);
errdefer self.allocator.destroy(obj_di); errdefer self.allocator.destroy(obj_di);
obj_di.* = openCoffDebugInfo(self.allocator, name_buffer[0 .. len + 4 :0]) catch |err| switch (err) { const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo, error.FileNotFound => return error.MissingDebugInfo,
else => return err, else => return err,
}; };
obj_di.* = try readCoffDebugInfo(self.allocator, coff_file);
obj_di.base_address = seg_start; obj_di.base_address = seg_start;
try self.address_map.putNoClobber(seg_start, obj_di); try self.address_map.putNoClobber(seg_start, obj_di);
@ -1280,20 +1292,18 @@ pub const DebugInfo = struct {
return obj_di; return obj_di;
} }
const elf_path = if (ctx.name.len > 0)
ctx.name
else blk: {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
break :blk try fs.selfExePath(&buf);
};
const obj_di = try self.allocator.create(ModuleDebugInfo); const obj_di = try self.allocator.create(ModuleDebugInfo);
errdefer self.allocator.destroy(obj_di); errdefer self.allocator.destroy(obj_di);
obj_di.* = openElfDebugInfo(self.allocator, elf_path) catch |err| switch (err) { const elf_file = (if (ctx.name.len > 0)
fs.cwd().openFile(ctx.name, .{ .intended_io_mode = .blocking })
else
fs.openSelfExe(.{ .intended_io_mode = .blocking })) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo, error.FileNotFound => return error.MissingDebugInfo,
else => return err, else => return err,
}; };
obj_di.* = try readElfDebugInfo(self.allocator, elf_file);
obj_di.base_address = ctx.base_address; obj_di.base_address = ctx.base_address;
try self.address_map.putNoClobber(ctx.base_address, obj_di); try self.address_map.putNoClobber(ctx.base_address, obj_di);
@ -1329,7 +1339,8 @@ pub const ModuleDebugInfo = switch (builtin.os.tag) {
} }
fn loadOFile(self: *@This(), o_file_path: []const u8) !DW.DwarfInfo { fn loadOFile(self: *@This(), o_file_path: []const u8) !DW.DwarfInfo {
const mapped_mem = try mapWholeFile(o_file_path); const o_file = try fs.cwd().openFile(o_file_path, .{ .intended_io_mode = .blocking });
const mapped_mem = try mapWholeFile(o_file);
const hdr = @ptrCast( const hdr = @ptrCast(
*const macho.mach_header_64, *const macho.mach_header_64,

View File

@ -33,8 +33,11 @@ pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirE
pub const Watch = @import("fs/watch.zig").Watch; pub const Watch = @import("fs/watch.zig").Watch;
/// This represents the maximum size of a UTF-8 encoded file path. /// This represents the maximum size of a UTF-8 encoded file path that the
/// All file system operations which return a path are guaranteed to /// operating system will accept. Paths, including those returned from file
/// system operations, may be longer than this length, but such paths cannot
/// be successfully passed back in other file system operations. However,
/// all path components returned by file system operations are assumed to
/// fit into a UTF-8 encoded array of this length. /// fit into a UTF-8 encoded array of this length.
/// The byte count includes room for a null sentinel byte. /// The byte count includes room for a null sentinel byte.
pub const MAX_PATH_BYTES = switch (builtin.os.tag) { pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@ -1194,7 +1197,7 @@ pub const Dir = struct {
/// Read value of a symbolic link. /// Read value of a symbolic link.
/// The return value is a slice of `buffer`, from index `0`. /// The return value is a slice of `buffer`, from index `0`.
/// Asserts that the path parameter has no null bytes. /// Asserts that the path parameter has no null bytes.
pub fn readLink(self: Dir, sub_path: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 { pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
const sub_path_c = try os.toPosixPath(sub_path); const sub_path_c = try os.toPosixPath(sub_path);
return self.readLinkZ(&sub_path_c, buffer); return self.readLinkZ(&sub_path_c, buffer);
} }
@ -1202,7 +1205,7 @@ pub const Dir = struct {
pub const readLinkC = @compileError("deprecated: renamed to readLinkZ"); pub const readLinkC = @compileError("deprecated: renamed to readLinkZ");
/// Same as `readLink`, except the `pathname` parameter is null-terminated. /// Same as `readLink`, except the `pathname` parameter is null-terminated.
pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 { pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
return os.readlinkatZ(self.fd, sub_path_c, buffer); return os.readlinkatZ(self.fd, sub_path_c, buffer);
} }
@ -1320,6 +1323,9 @@ pub const Dir = struct {
var cleanup_dir = true; var cleanup_dir = true;
defer if (cleanup_dir) dir.close(); defer if (cleanup_dir) dir.close();
// Valid use of MAX_PATH_BYTES because dir_name_buf will only
// ever store a single path component that was returned from the
// filesystem.
var dir_name_buf: [MAX_PATH_BYTES]u8 = undefined; var dir_name_buf: [MAX_PATH_BYTES]u8 = undefined;
var dir_name: []const u8 = sub_path; var dir_name: []const u8 = sub_path;
@ -1772,19 +1778,21 @@ pub fn walkPath(allocator: *Allocator, dir_path: []const u8) !Walker {
pub const OpenSelfExeError = os.OpenError || os.windows.CreateFileError || SelfExePathError || os.FlockError; pub const OpenSelfExeError = os.OpenError || os.windows.CreateFileError || SelfExePathError || os.FlockError;
pub fn openSelfExe() OpenSelfExeError!File { pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
if (builtin.os.tag == .linux) { if (builtin.os.tag == .linux) {
return openFileAbsoluteZ("/proc/self/exe", .{}); return openFileAbsoluteZ("/proc/self/exe", flags);
} }
if (builtin.os.tag == .windows) { if (builtin.os.tag == .windows) {
const wide_slice = selfExePathW(); const wide_slice = selfExePathW();
const prefixed_path_w = try os.windows.wToPrefixedFileW(wide_slice); const prefixed_path_w = try os.windows.wToPrefixedFileW(wide_slice);
return cwd().openFileW(prefixed_path_w.span(), .{}); return cwd().openFileW(prefixed_path_w.span(), flags);
} }
// Use of MAX_PATH_BYTES here is valid as the resulting path is immediately
// opened with no modification.
var buf: [MAX_PATH_BYTES]u8 = undefined; var buf: [MAX_PATH_BYTES]u8 = undefined;
const self_exe_path = try selfExePath(&buf); const self_exe_path = try selfExePath(&buf);
buf[self_exe_path.len] = 0; buf[self_exe_path.len] = 0;
return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, .{}); return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags);
} }
pub const SelfExePathError = os.ReadLinkError || os.SysCtlError; pub const SelfExePathError = os.ReadLinkError || os.SysCtlError;
@ -1792,6 +1800,13 @@ pub const SelfExePathError = os.ReadLinkError || os.SysCtlError;
/// `selfExePath` except allocates the result on the heap. /// `selfExePath` except allocates the result on the heap.
/// Caller owns returned memory. /// Caller owns returned memory.
pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 { pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
// Use of MAX_PATH_BYTES here is justified as, at least on one tested Linux
// system, readlink will completely fail to return a result larger than
// PATH_MAX even if given a sufficiently large buffer. This makes it
// fundamentally impossible to get the selfExePath of a program running in
// a very deeply nested directory chain in this way.
// TODO(#4812): Investigate other systems and whether it is possible to get
// this path by trying larger and larger buffers until one succeeds.
var buf: [MAX_PATH_BYTES]u8 = undefined; var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try selfExePath(&buf)); return mem.dupe(allocator, u8, try selfExePath(&buf));
} }
@ -1806,10 +1821,10 @@ pub fn selfExePathAlloc(allocator: *Allocator) ![]u8 {
/// On Linux, depends on procfs being mounted. If the currently executing binary has /// On Linux, depends on procfs being mounted. If the currently executing binary has
/// been deleted, the file path looks something like `/a/b/c/exe (deleted)`. /// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.
/// TODO make the return type of this a null terminated pointer /// TODO make the return type of this a null terminated pointer
pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 { pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (is_darwin) { if (is_darwin) {
var u32_len: u32 = out_buffer.len; var u32_len: u32 = @intCast(u32, math.min(out_buffer.len, math.maxInt(u32)));
const rc = std.c._NSGetExecutablePath(out_buffer, &u32_len); const rc = std.c._NSGetExecutablePath(out_buffer.ptr, &u32_len);
if (rc != 0) return error.NameTooLong; if (rc != 0) return error.NameTooLong;
return mem.spanZ(@ptrCast([*:0]u8, out_buffer)); return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
} }
@ -1818,14 +1833,14 @@ pub fn selfExePath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]u8 {
.freebsd, .dragonfly => { .freebsd, .dragonfly => {
var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC, os.KERN_PROC_PATHNAME, -1 }; var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC, os.KERN_PROC_PATHNAME, -1 };
var out_len: usize = out_buffer.len; var out_len: usize = out_buffer.len;
try os.sysctl(&mib, out_buffer, &out_len, null, 0); try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
// TODO could this slice from 0 to out_len instead? // TODO could this slice from 0 to out_len instead?
return mem.spanZ(@ptrCast([*:0]u8, out_buffer)); return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
}, },
.netbsd => { .netbsd => {
var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC_ARGS, -1, os.KERN_PROC_PATHNAME }; var mib = [4]c_int{ os.CTL_KERN, os.KERN_PROC_ARGS, -1, os.KERN_PROC_PATHNAME };
var out_len: usize = out_buffer.len; var out_len: usize = out_buffer.len;
try os.sysctl(&mib, out_buffer, &out_len, null, 0); try os.sysctl(&mib, out_buffer.ptr, &out_len, null, 0);
// TODO could this slice from 0 to out_len instead? // TODO could this slice from 0 to out_len instead?
return mem.spanZ(@ptrCast([*:0]u8, out_buffer)); return mem.spanZ(@ptrCast([*:0]u8, out_buffer));
}, },
@ -1848,13 +1863,20 @@ pub fn selfExePathW() [:0]const u16 {
/// `selfExeDirPath` except allocates the result on the heap. /// `selfExeDirPath` except allocates the result on the heap.
/// Caller owns returned memory. /// Caller owns returned memory.
pub fn selfExeDirPathAlloc(allocator: *Allocator) ![]u8 { pub fn selfExeDirPathAlloc(allocator: *Allocator) ![]u8 {
// Use of MAX_PATH_BYTES here is justified as, at least on one tested Linux
// system, readlink will completely fail to return a result larger than
// PATH_MAX even if given a sufficiently large buffer. This makes it
// fundamentally impossible to get the selfExeDirPath of a program running
// in a very deeply nested directory chain in this way.
// TODO(#4812): Investigate other systems and whether it is possible to get
// this path by trying larger and larger buffers until one succeeds.
var buf: [MAX_PATH_BYTES]u8 = undefined; var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try selfExeDirPath(&buf)); return mem.dupe(allocator, u8, try selfExeDirPath(&buf));
} }
/// Get the directory path that contains the current executable. /// Get the directory path that contains the current executable.
/// Returned value is a slice of out_buffer. /// Returned value is a slice of out_buffer.
pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const u8 { pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
const self_exe_path = try selfExePath(out_buffer); const self_exe_path = try selfExePath(out_buffer);
// Assume that the OS APIs return absolute paths, and therefore dirname // Assume that the OS APIs return absolute paths, and therefore dirname
// will not return null. // will not return null.
@ -1864,6 +1886,12 @@ pub fn selfExeDirPath(out_buffer: *[MAX_PATH_BYTES]u8) SelfExePathError![]const
/// `realpath`, except caller must free the returned memory. /// `realpath`, except caller must free the returned memory.
/// TODO integrate with `Dir` /// TODO integrate with `Dir`
pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 { pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 {
// Use of MAX_PATH_BYTES here is valid as the realpath function does not
// have a variant that takes an arbitrary-size buffer.
// TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
// NULL out parameter (GNU's canonicalize_file_name) to handle overelong
// paths. musl supports passing NULL but restricts the output to PATH_MAX
// anyway.
var buf: [MAX_PATH_BYTES]u8 = undefined; var buf: [MAX_PATH_BYTES]u8 = undefined;
return mem.dupe(allocator, u8, try os.realpath(pathname, &buf)); return mem.dupe(allocator, u8, try os.realpath(pathname, &buf));
} }

View File

@ -6,7 +6,7 @@ const File = std.fs.File;
test "openSelfExe" { test "openSelfExe" {
if (builtin.os.tag == .wasi) return error.SkipZigTest; if (builtin.os.tag == .wasi) return error.SkipZigTest;
const self_exe_file = try std.fs.openSelfExe(); const self_exe_file = try std.fs.openSelfExe(.{});
self_exe_file.close(); self_exe_file.close();
} }

View File

@ -1236,6 +1236,8 @@ pub fn execvpeZ_expandArg0(
if (mem.indexOfScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp); if (mem.indexOfScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp);
const PATH = getenvZ("PATH") orelse "/usr/local/bin:/bin/:/usr/bin"; const PATH = getenvZ("PATH") orelse "/usr/local/bin:/bin/:/usr/bin";
// Use of MAX_PATH_BYTES here is valid as the path_buf will be passed
// directly to the operating system in execveZ.
var path_buf: [MAX_PATH_BYTES]u8 = undefined; var path_buf: [MAX_PATH_BYTES]u8 = undefined;
var it = mem.tokenize(PATH, ":"); var it = mem.tokenize(PATH, ":");
var seen_eacces = false; var seen_eacces = false;

View File

@ -15,14 +15,34 @@ pub const changeCurDir = os.chdir;
pub const changeCurDirC = os.chdirC; pub const changeCurDirC = os.chdirC;
/// The result is a slice of `out_buffer`, from index `0`. /// The result is a slice of `out_buffer`, from index `0`.
pub fn getCwd(out_buffer: *[fs.MAX_PATH_BYTES]u8) ![]u8 { pub fn getCwd(out_buffer: []u8) ![]u8 {
return os.getcwd(out_buffer); return os.getcwd(out_buffer);
} }
/// Caller must free the returned memory. /// Caller must free the returned memory.
pub fn getCwdAlloc(allocator: *Allocator) ![]u8 { pub fn getCwdAlloc(allocator: *Allocator) ![]u8 {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined; // The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit
return mem.dupe(allocator, u8, try os.getcwd(&buf)); // in stack_buf, avoiding an extra allocation in the common case.
var stack_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
var heap_buf: ?[]u8 = null;
defer if (heap_buf) |buf| allocator.free(buf);
var current_buf: []u8 = &stack_buf;
while (true) {
if (os.getcwd(current_buf)) |slice| {
return mem.dupe(allocator, u8, slice);
} else |err| switch (err) {
error.NameTooLong => {
// The path is too long to fit in stack_buf. Allocate geometrically
// increasing buffers until we find one that works
const new_capacity = current_buf.len * 2;
if (heap_buf) |buf| allocator.free(buf);
current_buf = try allocator.alloc(u8, new_capacity);
heap_buf = current_buf;
},
else => |e| return e,
}
}
} }
test "getCwdAlloc" { test "getCwdAlloc" {