From 25a0648176ce0e0061a035fdf602795368a3b40e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 24 Jun 2025 07:48:52 -0700 Subject: [PATCH] std: move copy_file_range, fcopyfile impls and usage --- lib/std/fs/Dir.zig | 102 +++++++++++++---------------------------- lib/std/fs/File.zig | 86 ++++++++++++++++++++++++++++++++-- lib/std/os.zig | 1 + lib/std/os/freebsd.zig | 49 ++++++++++++++++++++ lib/std/os/linux.zig | 67 ++++++++++++++++++++++++++- lib/std/posix.zig | 90 ------------------------------------ 6 files changed, 229 insertions(+), 166 deletions(-) create mode 100644 lib/std/os/freebsd.zig diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index 12f2ef835c..21926991a8 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -2609,7 +2609,7 @@ pub fn updateFile( try dest_dir.makePath(dirname); } - var buffer: [2000]u8 = undefined; + var buffer: [1000]u8 = undefined; // Used only when direct fd-to-fd is not available. var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = actual_mode, .write_buffer = &buffer, @@ -2619,7 +2619,7 @@ pub fn updateFile( var src_reader: File.Reader = .initSize(src_file, &.{}, src_stat.size); const dest_writer = &atomic_file.file_writer.interface; - dest_writer.writeFileAll(&src_reader, .{}) catch |err| switch (err) { + _ = dest_writer.sendFileAll(&src_reader, .unlimited) catch |err| switch (err) { error.ReadFailed => return src_reader.err.?, error.WriteFailed => return atomic_file.file_writer.err.?, }; @@ -2628,16 +2628,22 @@ pub fn updateFile( return .stale; } -pub const CopyFileError = File.OpenError || File.StatError || - AtomicFile.InitError || CopyFileRawError || AtomicFile.FinishError; +pub const CopyFileError = File.OpenError || File.StatError || File.ReadError || File.WriteError || + AtomicFile.InitError || AtomicFile.FinishError; -/// Guaranteed to be atomic. -/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available, -/// there is a possibility of power loss or application termination leaving temporary files present -/// in the same directory as dest_path. -/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). -/// On WASI, both paths should be encoded as valid UTF-8. -/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. +/// Atomically creates a new file at `dest_path` within `dest_dir` with the +/// same contents as `source_path` within `source_dir`, overwriting any already +/// existing file. +/// +/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and +/// readily available, there is a possibility of power loss or application +/// termination leaving temporary files present in the same directory as +/// dest_path. +/// +/// On Windows, both paths should be encoded as +/// [WTF-8](https://simonsapin.github.io/wtf-8/). On WASI, both paths should be +/// encoded as valid UTF-8. On other platforms, both paths are an opaque +/// sequence of bytes with no particular encoding. pub fn copyFile( source_dir: Dir, source_path: []const u8, @@ -2645,74 +2651,28 @@ pub fn copyFile( dest_path: []const u8, options: CopyFileOptions, ) CopyFileError!void { - var in_file = try source_dir.openFile(source_path, .{}); - defer in_file.close(); + var file_reader: File.Reader = .init(try source_dir.openFile(source_path, .{}), &.{}); + defer file_reader.file.close(); - var size: ?u64 = null; const mode = options.override_mode orelse blk: { - const st = try in_file.stat(); - size = st.size; + const st = try file_reader.file.stat(); + file_reader.size = st.size; break :blk st.mode; }; - var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode }); + var buffer: [1000]u8 = undefined; // Used only when direct fd-to-fd is not available. + var atomic_file = try dest_dir.atomicFile(dest_path, .{ + .mode = mode, + .write_buffer = &buffer, + }); defer atomic_file.deinit(); - try copy_file(in_file.handle, atomic_file.file_writer.file.handle, size); + const size = atomic_file.file_writer.interface.sendFileAll(&file_reader, .unlimited) catch |err| switch (err) { + error.ReadFailed => return file_reader.err.?, + error.WriteFailed => return atomic_file.file_writer.err.?, + }; try atomic_file.finish(); -} - -const CopyFileRawError = error{SystemResources} || posix.CopyFileRangeError || posix.SendFileError; - -// Transfer all the data between two file descriptors in the most efficient way. -// The copy starts at offset 0, the initial offsets are preserved. -// No metadata is transferred over. -fn copy_file(fd_in: posix.fd_t, fd_out: posix.fd_t, maybe_size: ?u64) CopyFileRawError!void { - if (builtin.target.os.tag.isDarwin()) { - const rc = posix.system.fcopyfile(fd_in, fd_out, null, .{ .DATA = true }); - switch (posix.errno(rc)) { - .SUCCESS => return, - .INVAL => unreachable, - .NOMEM => return error.SystemResources, - // The source file is not a directory, symbolic link, or regular file. - // Try with the fallback path before giving up. - .OPNOTSUPP => {}, - else => |err| return posix.unexpectedErrno(err), - } - } - - if (native_os == .linux) { - // Try copy_file_range first as that works at the FS level and is the - // most efficient method (if available). - var offset: u64 = 0; - cfr_loop: while (true) { - // The kernel checks the u64 value `offset+count` for overflow, use - // a 32 bit value so that the syscall won't return EINVAL except for - // impossibly large files (> 2^64-1 - 2^32-1). - const amt = try posix.copy_file_range(fd_in, offset, fd_out, offset, std.math.maxInt(u32), 0); - // Terminate as soon as we have copied size bytes or no bytes - if (maybe_size) |s| { - if (s == amt) break :cfr_loop; - } - if (amt == 0) break :cfr_loop; - offset += amt; - } - return; - } - - // Sendfile is a zero-copy mechanism iff the OS supports it, otherwise the - // fallback code will copy the contents chunk by chunk. - const empty_iovec = [0]posix.iovec_const{}; - var offset: u64 = 0; - sendfile_loop: while (true) { - const amt = try posix.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0); - // Terminate as soon as we have copied size bytes or no bytes - if (maybe_size) |s| { - if (s == amt) break :sendfile_loop; - } - if (amt == 0) break :sendfile_loop; - offset += amt; - } + _ = size; } pub const AtomicFileOptions = struct { diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 665e7de02a..3c50bfe757 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -961,7 +961,7 @@ pub const Reader = struct { }; } - pub fn initSize(file: File, buffer: []u8, size: u64) Reader { + pub fn initSize(file: File, buffer: []u8, size: ?u64) Reader { return .{ .file = file, .interface = initInterface(buffer), @@ -1099,7 +1099,6 @@ pub const Reader = struct { var iovecs_buffer: [max_buffers_len]posix.iovec = undefined; const dest = try w.writableVectorPosix(&iovecs_buffer, limit); assert(dest[0].len > 0); - // TODO also add buffer at the end const n = posix.readv(r.file.handle, dest) catch |err| { r.err = err; return error.ReadFailed; @@ -1251,6 +1250,8 @@ pub const Writer = struct { mode: Writer.Mode = .positional, pos: u64 = 0, sendfile_err: ?SendfileError = null, + copy_file_range_err: ?CopyFileRangeError = null, + fcopyfile_err: ?FcopyfileError = null, seek_err: ?SeekError = null, interface: std.io.Writer, @@ -1265,6 +1266,14 @@ pub const Writer = struct { Unexpected, }; + pub const CopyFileRangeError = std.os.freebsd.CopyFileRangeError || std.os.linux.wrapped.CopyFileRangeError; + + pub const FcopyfileError = error{ + OperationNotSupported, + OutOfMemory, + Unexpected, + }; + /// Number of slices to store on the stack, when trying to send as many byte /// vectors through the underlying write calls as possible. const max_buffers_len = 16; @@ -1408,7 +1417,6 @@ pub const Writer = struct { const w: *Writer = @fieldParentPtr("interface", io_writer); const out_fd = w.file.handle; const in_fd = file_reader.file.handle; - // TODO try using copy_file_range on Linux // TODO try using copy_file_range on FreeBSD // TODO try using sendfile on macOS // TODO try using sendfile on FreeBSD @@ -1416,7 +1424,8 @@ pub const Writer = struct { // Try using sendfile on Linux. if (w.sendfile_err != null) break :sf; // Linux sendfile does not support headers. - if (io_writer.end != 0) return drain(io_writer, &.{""}, 1); + const buffered = limit.slice(file_reader.interface.buffer); + if (io_writer.end != 0 or buffered.len != 0) return drain(io_writer, &.{buffered}, 1); const max_count = 0x7ffff000; // Avoid EINVAL. var off: std.os.linux.off_t = undefined; const off_ptr: ?*std.os.linux.off_t, const count: usize = switch (file_reader.mode) { @@ -1455,6 +1464,75 @@ pub const Writer = struct { w.pos += n; return n; } + const copy_file_range_fn = switch (native_os) { + .freebsd => std.os.freebsd.copy_file_range, + .linux => if (std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 })) std.os.linux.wrapped.copy_file_range else null, + else => null, + }; + if (copy_file_range_fn) |copy_file_range| cfr: { + if (w.copy_file_range_err != null) break :cfr; + const buffered = limit.slice(file_reader.interface.buffer); + if (io_writer.end != 0 or buffered.len != 0) return drain(io_writer, &.{buffered}, 1); + var off_in: i64 = undefined; + var off_out: i64 = undefined; + const off_in_ptr: ?*i64 = switch (file_reader.mode) { + .positional_reading, .streaming_reading => return error.Unimplemented, + .positional => p: { + off_in = file_reader.pos; + break :p &off_in; + }, + .streaming => null, + .failure => return error.WriteFailed, + }; + const off_out_ptr: ?*i64 = switch (w.mode) { + .positional_reading, .streaming_reading => return error.Unimplemented, + .positional => p: { + off_out = w.pos; + break :p &off_out; + }, + .streaming => null, + .failure => return error.WriteFailed, + }; + const n = copy_file_range(in_fd, off_in_ptr, out_fd, off_out_ptr, @intFromEnum(limit), 0) catch |err| { + w.copy_file_range_err = err; + return 0; + }; + file_reader.pos += n; + w.pos += n; + return n; + } + + if (builtin.os.tag.isDarwin()) fcf: { + if (w.fcopyfile_err != null) break :fcf; + if (file_reader.pos != 0) break :fcf; + if (w.pos != 0) break :fcf; + if (limit != .unlimited) break :fcf; + const rc = std.c.fcopyfile(in_fd, out_fd, null, .{ .DATA = true }); + switch (posix.errno(rc)) { + .SUCCESS => {}, + .INVAL => if (builtin.mode == .Debug) @panic("invalid API usage") else { + w.fcopyfile_err = error.Unexpected; + return 0; + }, + .NOMEM => { + w.fcopyfile_err = error.OutOfMemory; + return 0; + }, + .OPNOTSUPP => { + w.fcopyfile_err = error.OperationNotSupported; + return 0; + }, + else => |err| { + w.fcopyfile_err = posix.unexpectedErrno(err); + return 0; + }, + } + const n = if (file_reader.size) |size| size else @panic("TODO figure out how much copied"); + file_reader.pos = n; + w.pos = n; + return n; + } + return error.Unimplemented; } diff --git a/lib/std/os.zig b/lib/std/os.zig index 3cf0c745cc..7de672191a 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -31,6 +31,7 @@ pub const uefi = @import("os/uefi.zig"); pub const wasi = @import("os/wasi.zig"); pub const emscripten = @import("os/emscripten.zig"); pub const windows = @import("os/windows.zig"); +pub const freebsd = @import("os/freebsd.zig"); test { _ = linux; diff --git a/lib/std/os/freebsd.zig b/lib/std/os/freebsd.zig new file mode 100644 index 0000000000..cdc1973e04 --- /dev/null +++ b/lib/std/os/freebsd.zig @@ -0,0 +1,49 @@ +const std = @import("../std.zig"); +const fd_t = std.c.fd_t; +const off_t = std.c.off_t; +const unexpectedErrno = std.posix.unexpectedErrno; +const errno = std.posix.errno; + +pub const CopyFileRangeError = error{ + /// If infd is not open for reading or outfd is not open for writing, or + /// opened for writing with O_APPEND, or if infd and outfd refer to the + /// same file. + BadFileFlags, + /// If the copy exceeds the process's file size limit or the maximum + /// file size for the file system outfd re- sides on. + FileTooBig, + /// A signal interrupted the system call before it could be completed. + /// This may happen for files on some NFS mounts. When this happens, + /// the values pointed to by inoffp and outoffp are reset to the + /// initial values for the system call. + Interrupted, + /// One of: + /// * infd and outfd refer to the same file and the byte ranges overlap. + /// * The flags argument is not zero. + /// * Either infd or outfd refers to a file object that is not a regular file. + InvalidArguments, + /// An I/O error occurred while reading/writing the files. + InputOutput, + /// Corrupted data was detected while reading from a file system. + CorruptedData, + /// Either infd or outfd refers to a directory. + IsDir, + /// File system that stores outfd is full. + NoSpaceLeft, +}; + +pub fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: u32) CopyFileRangeError!usize { + const rc = std.c.copy_file_range(fd_in, off_in, fd_out, off_out, len, flags); + switch (errno(rc)) { + .SUCCESS => return @intCast(rc), + .BADF => return error.BadFileFlags, + .FBIG => return error.FileTooBig, + .INTR => return error.Interrupted, + .INVAL => return error.InvalidArguments, + .IO => return error.InputOutput, + .INTEGRITY => return error.CorruptedData, + .ISDIR => return error.IsDir, + .NOSPC => return error.NoSpaceLeft, + else => |err| return unexpectedErrno(err), + } +} diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 7296cfb873..75494145b9 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -9453,7 +9453,7 @@ pub const wrapped = struct { const sendfileSymbol = if (lfs64_abi) system.sendfile64 else system.sendfile; const rc = sendfileSymbol(out_fd, in_fd, in_offset, adjusted_len); switch (errno(rc)) { - .SUCCESS => return @bitCast(rc), + .SUCCESS => return @intCast(rc), .BADF => return invalidApiUsage(), // Always a race condition. .FAULT => return invalidApiUsage(), // Segmentation fault. .OVERFLOW => return unexpectedErrno(.OVERFLOW), // We avoid passing too large of a `count`. @@ -9469,6 +9469,71 @@ pub const wrapped = struct { } } + pub const CopyFileRangeError = std.posix.UnexpectedError || error{ + /// One of: + /// * One or more file descriptors are not valid. + /// * fd_in is not open for reading; or fd_out is not open for writing. + /// * The O_APPEND flag is set for the open file description referred + /// to by the file descriptor fd_out. + BadFileFlags, + /// One of: + /// * An attempt was made to write at a position past the maximum file + /// offset the kernel supports. + /// * An attempt was made to write a range that exceeds the allowed + /// maximum file size. The maximum file size differs between + /// filesystem implementations and can be different from the maximum + /// allowed file offset. + /// * An attempt was made to write beyond the process's file size + /// resource limit. This may also result in the process receiving a + /// SIGXFSZ signal. + FileTooBig, + /// One of: + /// * either fd_in or fd_out is not a regular file + /// * flags argument is not zero + /// * fd_in and fd_out refer to the same file and the source and target ranges overlap. + InvalidArguments, + /// A low-level I/O error occurred while copying. + InputOutput, + /// Either fd_in or fd_out refers to a directory. + IsDir, + OutOfMemory, + /// There is not enough space on the target filesystem to complete the copy. + NoSpaceLeft, + /// (since Linux 5.19) the filesystem does not support this operation. + OperationNotSupported, + /// The requested source or destination range is too large to represent + /// in the specified data types. + Overflow, + /// fd_out refers to an immutable file. + PermissionDenied, + /// Either fd_in or fd_out refers to an active swap file. + SwapFile, + /// The files referred to by fd_in and fd_out are not on the same + /// filesystem, and the source and target filesystems are not of the + /// same type, or do not support cross-filesystem copy. + NotSameFileSystem, + }; + + pub fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: u32) CopyFileRangeError!usize { + const rc = system.copy_file_range(fd_in, off_in, fd_out, off_out, len, flags); + switch (errno(rc)) { + .SUCCESS => return @intCast(rc), + .BADF => return error.BadFileFlags, + .FBIG => return error.FileTooBig, + .INVAL => return error.InvalidArguments, + .IO => return error.InputOutput, + .ISDIR => return error.IsDir, + .NOMEM => return error.OutOfMemory, + .NOSPC => return error.NoSpaceLeft, + .OPNOTSUPP => return error.OperationNotSupported, + .OVERFLOW => return error.Overflow, + .PERM => return error.PermissionDenied, + .TXTBSY => return error.SwapFile, + .XDEV => return error.NotSameFileSystem, + else => |err| return unexpectedErrno(err), + } + } + const unexpectedErrno = std.posix.unexpectedErrno; fn invalidApiUsage() error{Unexpected} { diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 36f9f12c23..059383a8d3 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -6601,96 +6601,6 @@ fn count_iovec_bytes(iovs: []const iovec_const) usize { return count; } -pub const CopyFileRangeError = error{ - FileTooBig, - InputOutput, - /// `fd_in` is not open for reading; or `fd_out` is not open for writing; - /// or the `APPEND` flag is set for `fd_out`. - FilesOpenedWithWrongFlags, - IsDir, - OutOfMemory, - NoSpaceLeft, - Unseekable, - PermissionDenied, - SwapFile, - CorruptedData, -} || PReadError || PWriteError || UnexpectedError; - -/// Transfer data between file descriptors at specified offsets. -/// -/// Returns the number of bytes written, which can less than requested. -/// -/// The `copy_file_range` call copies `len` bytes from one file descriptor to another. When possible, -/// this is done within the operating system kernel, which can provide better performance -/// characteristics than transferring data from kernel to user space and back, such as with -/// `pread` and `pwrite` calls. -/// -/// `fd_in` must be a file descriptor opened for reading, and `fd_out` must be a file descriptor -/// opened for writing. They may be any kind of file descriptor; however, if `fd_in` is not a regular -/// file system file, it may cause this function to fall back to calling `pread` and `pwrite`, in which case -/// atomicity guarantees no longer apply. -/// -/// If `fd_in` and `fd_out` are the same, source and target ranges must not overlap. -/// The file descriptor seek positions are ignored and not updated. -/// When `off_in` is past the end of the input file, it successfully reads 0 bytes. -/// -/// `flags` has different meanings per operating system; refer to the respective man pages. -/// -/// These systems support in-kernel data copying: -/// * Linux (cross-filesystem from version 5.3) -/// * FreeBSD 13.0 -/// -/// Other systems fall back to calling `pread` / `pwrite`. -/// -/// Maximum offsets on Linux and FreeBSD are `maxInt(i64)`. -pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize { - if (builtin.os.tag == .freebsd or - (comptime builtin.os.tag == .linux and std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 }))) - { - var off_in_copy: i64 = @bitCast(off_in); - var off_out_copy: i64 = @bitCast(off_out); - - while (true) { - const rc = system.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags); - if (native_os == .freebsd) { - switch (errno(rc)) { - .SUCCESS => return @intCast(rc), - .BADF => return error.FilesOpenedWithWrongFlags, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .ISDIR => return error.IsDir, - .NOSPC => return error.NoSpaceLeft, - .INVAL => break, // these may not be regular files, try fallback - .INTEGRITY => return error.CorruptedData, - .INTR => continue, - else => |err| return unexpectedErrno(err), - } - } else { // assume linux - switch (errno(rc)) { - .SUCCESS => return @intCast(rc), - .BADF => return error.FilesOpenedWithWrongFlags, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .ISDIR => return error.IsDir, - .NOSPC => return error.NoSpaceLeft, - .INVAL => break, // these may not be regular files, try fallback - .NOMEM => return error.OutOfMemory, - .OVERFLOW => return error.Unseekable, - .PERM => return error.PermissionDenied, - .TXTBSY => return error.SwapFile, - .XDEV => break, // support for cross-filesystem copy added in Linux 5.3, use fallback - else => |err| return unexpectedErrno(err), - } - } - } - } - - var buf: [8 * 4096]u8 = undefined; - const amt_read = try pread(fd_in, buf[0..@min(buf.len, len)], off_in); - if (amt_read == 0) return 0; - return pwrite(fd_out, buf[0..amt_read], off_out); -} - pub const PollError = error{ /// The network subsystem has failed. NetworkSubsystemFailed,