From fd10baf748aeb3b6d2394701d735098e64638a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 19 Aug 2022 14:53:00 +0300 Subject: [PATCH] os.copy_file_range: save a syscall for most operations Currenty copy_file_range always uses at least two syscalls: 1. As many as it needs to do the initial copy (always 1 during my testing) 2. The last one is always when offset is the size of the file. The second syscall is used to detect the terminating condition. However, because we do a stat for other reasons, we know the size of the file, and we can skip the syscall. Sparse files: since copy_file_range expands holes of sparse files, I conclude that this layer was not intended to work with sparse files. In other words, this commit does not make it worse for sparse file society. Test program ------------ const std = @import("std"); pub fn main() !void { const arg1 = std.mem.span(std.os.argv[1]); const arg2 = std.mem.span(std.os.argv[2]); try std.fs.cwd().copyFile(arg1, std.fs.cwd(), arg2, .{}); } Test output (current master) ---------------------------- Observe two `copy_file_range` syscalls: one with 209 bytes, one with zero: $ zig build-exe cp.zig $ strace ./cp ./cp.zig ./cp2.zig |& grep copy_file_range copy_file_range(3, [0], 5, [0], 4294967295, 0) = 209 copy_file_range(3, [209], 5, [209], 4294967295, 0) = 0 $ Test output (this diff) ----------------------- Observe a single `copy_file_range` syscall with 209 bytes: $ /code/zig/build/zig build-exe cp.zig $ strace ./cp ./cp.zig ./cp2.zig |& grep copy_file_range copy_file_range(3, [0], 5, [0], 4294967295, 0) = 209 $ --- lib/std/fs.zig | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index f7027a70c9..dfadb144eb 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -2562,7 +2562,7 @@ pub const Dir = struct { var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode }); defer atomic_file.deinit(); - try copy_file(in_file.handle, atomic_file.file.handle); + try copy_file(in_file.handle, atomic_file.file.handle, size); try atomic_file.finish(); } @@ -3041,7 +3041,7 @@ const CopyFileRawError = error{SystemResources} || os.CopyFileRangeError || os.S // Transfer all the data between two file descriptors in the most efficient way. // The copy starts at offset 0, the initial offsets are preserved. // No metadata is transferred over. -fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void { +fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t, maybe_size: ?u64) CopyFileRawError!void { if (comptime builtin.target.isDarwin()) { const rc = os.system.fcopyfile(fd_in, fd_out, null, os.system.COPYFILE_DATA); switch (os.errno(rc)) { @@ -3064,7 +3064,10 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void { // a 32 bit value so that the syscall won't return EINVAL except for // impossibly large files (> 2^64-1 - 2^32-1). const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32), 0); - // Terminate when no data was copied + // Terminate as soon as we have copied size bytes or no bytes + if (maybe_size) |s| { + if (s == amt) break :cfr_loop; + } if (amt == 0) break :cfr_loop; offset += amt; } @@ -3077,7 +3080,10 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileRawError!void { var offset: u64 = 0; sendfile_loop: while (true) { const amt = try os.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0); - // Terminate when no data was copied + // Terminate as soon as we have copied size bytes or no bytes + if (maybe_size) |s| { + if (s == amt) break :sendfile_loop; + } if (amt == 0) break :sendfile_loop; offset += amt; }