breaking: std.os read/write functions + sendfile

* rework os.sendfile and add macosx support, and a fallback
   implementation for any OS.
 * fix sendto compile error
 * std.os write functions support partial writes. closes #3443.
 * std.os pread / pwrite functions can now return `error.Unseekable`.
 * std.fs.File read/write functions now have readAll/writeAll variants
   which loop to complete operations even when partial reads/writes
   happen.
 * Audit std.os read/write functions with respect to Linux returning
   EINVAL for lengths greater than 0x7fff0000.
 * std.os read/write shim functions do not unnecessarily loop. Since
   partial reads/writes are part of the API, the caller will be forced
   to loop anyway, and so that would just be code bloat.
 * Improve doc comments
 * Add a non-trivial test for std.os.sendfile
 * Fix std.os.pread on 32 bit Linux
 * Add missing SYS_sendfile bit on aarch64
This commit is contained in:
Andrew Kelley 2020-03-03 02:03:22 -05:00
parent bd287dd194
commit c81345c8ae
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
20 changed files with 834 additions and 370 deletions

View File

@ -142,7 +142,7 @@ pub extern "c" fn sendto(
buf: *const c_void,
len: usize,
flags: u32,
dest_addr: *const sockaddr,
dest_addr: ?*const sockaddr,
addrlen: socklen_t,
) isize;

View File

@ -55,6 +55,22 @@ pub extern "c" fn clock_get_time(clock_serv: clock_serv_t, cur_time: *mach_times
pub extern "c" fn host_get_clock_service(host: host_t, clock_id: clock_id_t, clock_serv: ?[*]clock_serv_t) kern_return_t;
pub extern "c" fn mach_port_deallocate(task: ipc_space_t, name: mach_port_name_t) kern_return_t;
pub const sf_hdtr = extern struct {
headers: [*]iovec_const,
hdr_cnt: c_int,
trailers: [*]iovec_const,
trl_cnt: c_int,
};
pub extern "c" fn sendfile(
out_fd: fd_t,
in_fd: fd_t,
offset: off_t,
len: *off_t,
sf_hdtr: ?*sf_hdtr,
flags: u32,
) c_int;
pub fn sigaddset(set: *sigset_t, signo: u5) void {
set.* |= @as(u32, 1) << (signo - 1);
}

View File

@ -14,7 +14,15 @@ pub const sf_hdtr = extern struct {
trailers: [*]iovec_const,
trl_cnt: c_int,
};
pub extern "c" fn sendfile(fd: c_int, s: c_int, offset: u64, nbytes: usize, sf_hdtr: ?*sf_hdtr, sbytes: ?*u64, flags: c_int) c_int;
pub extern "c" fn sendfile(
out_fd: fd_t,
in_fd: fd_t,
offset: ?*off_t,
nbytes: usize,
sf_hdtr: ?*sf_hdtr,
sbytes: ?*off_t,
flags: u32,
) c_int;
pub const dl_iterate_phdr_callback = extern fn (info: *dl_phdr_info, size: usize, data: ?*c_void) c_int;
pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*c_void) c_int;

View File

@ -82,6 +82,13 @@ pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
pub extern "c" fn memfd_create(name: [*:0]const u8, flags: c_uint) c_int;
pub extern "c" fn sendfile(
out_fd: fd_t,
in_fd: fd_t,
offset: ?*off_t,
count: usize,
) isize;
pub const pthread_attr_t = extern struct {
__size: [56]u8,
__align: c_long,

View File

@ -236,7 +236,8 @@ pub const Loop = struct {
var extra_thread_index: usize = 0;
errdefer {
// writing 8 bytes to an eventfd cannot fail
noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
const amt = noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
assert(amt == wakeup_bytes.len);
while (extra_thread_index != 0) {
extra_thread_index -= 1;
self.extra_threads[extra_thread_index].wait();
@ -682,7 +683,8 @@ pub const Loop = struct {
.linux => {
self.posixFsRequest(&self.os_data.fs_end_request);
// writing 8 bytes to an eventfd cannot fail
noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
const amt = noasync os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
assert(amt == wakeup_bytes.len);
return;
},
.macosx, .freebsd, .netbsd, .dragonfly => {
@ -831,7 +833,7 @@ pub const Loop = struct {
/// Performs an async `os.write` using a separate thread.
/// `fd` must block and not return EAGAIN.
pub fn write(self: *Loop, fd: os.fd_t, bytes: []const u8) os.WriteError!void {
pub fn write(self: *Loop, fd: os.fd_t, bytes: []const u8) os.WriteError!usize {
var req_node = Request.Node{
.data = .{
.msg = .{
@ -852,7 +854,7 @@ pub const Loop = struct {
/// Performs an async `os.writev` using a separate thread.
/// `fd` must block and not return EAGAIN.
pub fn writev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const) os.WriteError!void {
pub fn writev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const) os.WriteError!usize {
var req_node = Request.Node{
.data = .{
.msg = .{
@ -873,7 +875,7 @@ pub const Loop = struct {
/// Performs an async `os.pwritev` using a separate thread.
/// `fd` must block and not return EAGAIN.
pub fn pwritev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, offset: u64) os.WriteError!void {
pub fn pwritev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, offset: u64) os.WriteError!usize {
var req_node = Request.Node{
.data = .{
.msg = .{
@ -1137,7 +1139,7 @@ pub const Loop = struct {
pub const Write = struct {
fd: os.fd_t,
bytes: []const u8,
result: Error!void,
result: Error!usize,
pub const Error = os.WriteError;
};
@ -1145,7 +1147,7 @@ pub const Loop = struct {
pub const WriteV = struct {
fd: os.fd_t,
iov: []const os.iovec_const,
result: Error!void,
result: Error!usize,
pub const Error = os.WriteError;
};
@ -1154,9 +1156,9 @@ pub const Loop = struct {
fd: os.fd_t,
iov: []const os.iovec_const,
offset: usize,
result: Error!void,
result: Error!usize,
pub const Error = os.WriteError;
pub const Error = os.PWriteError;
};
pub const PReadV = struct {
@ -1165,7 +1167,7 @@ pub const Loop = struct {
offset: usize,
result: Error!usize,
pub const Error = os.ReadError;
pub const Error = os.PReadError;
};
pub const Open = struct {

View File

@ -153,7 +153,7 @@ pub fn updateFileMode(source_path: []const u8, dest_path: []const u8, mode: ?Fil
var buf: [mem.page_size * 6]u8 = undefined;
while (true) {
const amt = try in_stream.readFull(buf[0..]);
try atomic_file.file.write(buf[0..amt]);
try atomic_file.file.writeAll(buf[0..amt]);
if (amt != buf.len) {
try atomic_file.file.updateTimes(src_stat.atime, src_stat.mtime);
try atomic_file.finish();
@ -1329,7 +1329,7 @@ pub const Dir = struct {
pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) !void {
var file = try self.createFile(sub_path, .{});
defer file.close();
try file.write(data);
try file.writeAll(data);
}
pub const AccessError = os.AccessError;

View File

@ -228,63 +228,169 @@ pub const File = struct {
}
pub const ReadError = os.ReadError;
pub const PReadError = os.PReadError;
pub fn read(self: File, buffer: []u8) ReadError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.read(self.handle, buffer);
} else {
return os.read(self.handle, buffer);
}
return os.read(self.handle, buffer);
}
pub fn pread(self: File, buffer: []u8, offset: u64) ReadError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.pread(self.handle, buffer);
pub fn readAll(self: File, buffer: []u8) ReadError!void {
var index: usize = 0;
while (index < buffer.len) {
index += try self.read(buffer[index..]);
}
}
pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.pread(self.handle, buffer, offset);
} else {
return os.pread(self.handle, buffer, offset);
}
}
pub fn preadAll(self: File, buffer: []u8, offset: u64) PReadError!void {
var index: usize = 0;
while (index < buffer.len) {
index += try self.pread(buffer[index..], offset + index);
}
return os.pread(self.handle, buffer, offset);
}
pub fn readv(self: File, iovecs: []const os.iovec) ReadError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.readv(self.handle, iovecs);
} else {
return os.readv(self.handle, iovecs);
}
return os.readv(self.handle, iovecs);
}
pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) ReadError!usize {
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
/// order to handle partial reads from the underlying OS layer.
pub fn readvAll(self: File, iovecs: []os.iovec) ReadError!void {
var i: usize = 0;
while (true) {
var amt = try self.readv(iovecs[i..]);
while (amt >= iovecs[i].iov_len) {
amt -= iovecs[i].iov_len;
i += 1;
if (i >= iovecs.len) return;
}
iovecs[i].iov_base += amt;
iovecs[i].iov_len -= amt;
}
}
pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) PReadError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.preadv(self.handle, iovecs, offset);
} else {
return os.preadv(self.handle, iovecs, offset);
}
}
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
/// order to handle partial reads from the underlying OS layer.
pub fn preadvAll(self: File, iovecs: []const os.iovec, offset: u64) PReadError!void {
var i: usize = 0;
var off: usize = 0;
while (true) {
var amt = try self.preadv(iovecs[i..], offset + off);
off += amt;
while (amt >= iovecs[i].iov_len) {
amt -= iovecs[i].iov_len;
i += 1;
if (i >= iovecs.len) return;
}
iovecs[i].iov_base += amt;
iovecs[i].iov_len -= amt;
}
return os.preadv(self.handle, iovecs, offset);
}
pub const WriteError = os.WriteError;
pub const PWriteError = os.PWriteError;
pub fn write(self: File, bytes: []const u8) WriteError!void {
pub fn write(self: File, bytes: []const u8) WriteError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.write(self.handle, bytes);
} else {
return os.write(self.handle, bytes);
}
return os.write(self.handle, bytes);
}
pub fn pwrite(self: File, bytes: []const u8, offset: u64) WriteError!void {
pub fn writeAll(self: File, bytes: []const u8) WriteError!void {
var index: usize = 0;
while (index < bytes.len) {
index += try self.write(bytes[index..]);
}
}
pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.pwrite(self.handle, bytes, offset);
} else {
return os.pwrite(self.handle, bytes, offset);
}
return os.pwrite(self.handle, bytes, offset);
}
pub fn writev(self: File, iovecs: []const os.iovec_const) WriteError!void {
pub fn pwriteAll(self: File, bytes: []const u8, offset: u64) PWriteError!void {
var index: usize = 0;
while (index < bytes.len) {
index += try self.pwrite(bytes[index..], offset + index);
}
}
pub fn writev(self: File, iovecs: []const os.iovec_const) WriteError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.writev(self.handle, iovecs);
} else {
return os.writev(self.handle, iovecs);
}
return os.writev(self.handle, iovecs);
}
pub fn pwritev(self: File, iovecs: []const os.iovec_const, offset: usize) WriteError!void {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.pwritev(self.handle, iovecs);
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
/// order to handle partial writes from the underlying OS layer.
pub fn writevAll(self: File, iovecs: []os.iovec_const) WriteError!void {
var i: usize = 0;
while (true) {
var amt = try self.writev(iovecs[i..]);
while (amt >= iovecs[i].iov_len) {
amt -= iovecs[i].iov_len;
i += 1;
if (i >= iovecs.len) return;
}
iovecs[i].iov_base += amt;
iovecs[i].iov_len -= amt;
}
}
pub fn pwritev(self: File, iovecs: []os.iovec_const, offset: usize) PWriteError!usize {
if (need_async_thread and self.io_mode == .blocking and !self.async_block_allowed) {
return std.event.Loop.instance.?.pwritev(self.handle, iovecs, offset);
} else {
return os.pwritev(self.handle, iovecs, offset);
}
}
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
/// order to handle partial writes from the underlying OS layer.
pub fn pwritevAll(self: File, iovecs: []os.iovec_const, offset: usize) PWriteError!void {
var i: usize = 0;
var off: usize = 0;
while (true) {
var amt = try self.pwritev(iovecs[i..], offset + off);
off += amt;
while (amt >= iovecs[i].iov_len) {
amt -= iovecs[i].iov_len;
i += 1;
if (i >= iovecs.len) return;
}
iovecs[i].iov_base += amt;
iovecs[i].iov_len -= amt;
}
return os.pwritev(self.handle, iovecs);
}
pub fn inStream(file: File) InStream {
@ -335,7 +441,7 @@ pub const File = struct {
pub const Error = WriteError;
pub const Stream = io.OutStream(Error);
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
const self = @fieldParentPtr(OutStream, "stream", out_stream);
return self.file.write(bytes);
}

View File

@ -437,10 +437,11 @@ pub fn BitInStream(endian: builtin.Endian, comptime Error: type) type {
};
}
/// This is a simple OutStream that writes to a fixed buffer, and returns an error
/// when it runs out of space.
/// This is a simple OutStream that writes to a fixed buffer. If the returned number
/// of bytes written is less than requested, the buffer is full.
/// Returns error.OutOfMemory when no bytes would be written.
pub const SliceOutStream = struct {
pub const Error = error{OutOfSpace};
pub const Error = error{OutOfMemory};
pub const Stream = OutStream(Error);
stream: Stream,
@ -464,9 +465,11 @@ pub const SliceOutStream = struct {
self.pos = 0;
}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
const self = @fieldParentPtr(SliceOutStream, "stream", out_stream);
if (bytes.len == 0) return 0;
assert(self.pos <= self.slice.len);
const n = if (self.pos + bytes.len <= self.slice.len)
@ -477,9 +480,9 @@ pub const SliceOutStream = struct {
std.mem.copy(u8, self.slice[self.pos .. self.pos + n], bytes[0..n]);
self.pos += n;
if (n < bytes.len) {
return Error.OutOfSpace;
}
if (n == 0) return error.OutOfMemory;
return n;
}
};
@ -508,7 +511,9 @@ pub const NullOutStream = struct {
};
}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
return bytes.len;
}
};
test "io.NullOutStream" {
@ -536,10 +541,11 @@ pub fn CountingOutStream(comptime OutStreamError: type) type {
};
}
fn writeFn(out_stream: *Stream, bytes: []const u8) OutStreamError!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) OutStreamError!usize {
const self = @fieldParentPtr(Self, "stream", out_stream);
try self.child_stream.write(bytes);
self.bytes_written += bytes.len;
return bytes.len;
}
};
}
@ -588,13 +594,14 @@ pub fn BufferedOutStreamCustom(comptime buffer_size: usize, comptime OutStreamEr
}
}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
const self = @fieldParentPtr(Self, "stream", out_stream);
if (bytes.len >= self.fifo.writableLength()) {
try self.flush();
return self.unbuffered_out_stream.write(bytes);
return self.unbuffered_out_stream.writeOnce(bytes);
}
self.fifo.writeAssumeCapacity(bytes);
return bytes.len;
}
};
}
@ -614,9 +621,10 @@ pub const BufferOutStream = struct {
};
}
fn writeFn(out_stream: *Stream, bytes: []const u8) !void {
fn writeFn(out_stream: *Stream, bytes: []const u8) !usize {
const self = @fieldParentPtr(BufferOutStream, "stream", out_stream);
return self.buffer.append(bytes);
try self.buffer.append(bytes);
return bytes.len;
}
};
@ -734,17 +742,17 @@ pub fn BitOutStream(endian: builtin.Endian, comptime Error: type) type {
self.bit_count = 0;
}
pub fn write(self_stream: *Stream, buffer: []const u8) Error!void {
pub fn write(self_stream: *Stream, buffer: []const u8) Error!usize {
var self = @fieldParentPtr(Self, "stream", self_stream);
//@NOTE: I'm not sure this is a good idea, maybe flushBits should be forced
// TODO: I'm not sure this is a good idea, maybe flushBits should be forced
if (self.bit_count > 0) {
for (buffer) |b, i|
try self.writeBits(b, u8_bit_count);
return;
return buffer.len;
}
return self.out_stream.write(buffer);
return self.out_stream.writeOnce(buffer);
}
};
}

View File

@ -20,10 +20,10 @@ pub const COutStream = struct {
};
}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
const self = @fieldParentPtr(COutStream, "stream", out_stream);
const amt_written = std.c.fwrite(bytes.ptr, 1, bytes.len, self.c_file);
if (amt_written == bytes.len) return;
if (amt_written >= 0) return amt_written;
switch (std.c._errno().*) {
0 => unreachable,
os.EINVAL => unreachable,

View File

@ -14,13 +14,13 @@ pub fn OutStream(comptime WriteError: type) type {
const Self = @This();
pub const Error = WriteError;
pub const WriteFn = if (std.io.is_async)
async fn (self: *Self, bytes: []const u8) Error!void
async fn (self: *Self, bytes: []const u8) Error!usize
else
fn (self: *Self, bytes: []const u8) Error!void;
fn (self: *Self, bytes: []const u8) Error!usize;
writeFn: WriteFn,
pub fn write(self: *Self, bytes: []const u8) Error!void {
pub fn writeOnce(self: *Self, bytes: []const u8) Error!usize {
if (std.io.is_async) {
// Let's not be writing 0xaa in safe modes for upwards of 4 MiB for every stream write.
@setRuntimeSafety(false);
@ -31,6 +31,13 @@ pub fn OutStream(comptime WriteError: type) type {
}
}
pub fn write(self: *Self, bytes: []const u8) Error!void {
var index: usize = 0;
while (index != bytes.len) {
index += try self.writeOnce(bytes[index..]);
}
}
pub fn print(self: *Self, comptime format: []const u8, args: var) Error!void {
return std.fmt.format(self, Error, write, format, args);
}

View File

@ -134,13 +134,13 @@ test "SliceOutStream" {
try ss.stream.write("world");
expect(mem.eql(u8, ss.getWritten(), "Helloworld"));
expectError(error.OutOfSpace, ss.stream.write("!"));
expectError(error.OutOfMemory, ss.stream.write("!"));
expect(mem.eql(u8, ss.getWritten(), "Helloworld"));
ss.reset();
expect(ss.getWritten().len == 0);
expectError(error.OutOfSpace, ss.stream.write("Hello world!"));
expectError(error.OutOfMemory, ss.stream.write("Hello world!"));
expect(mem.eql(u8, ss.getWritten(), "Hello worl"));
}
@ -617,7 +617,7 @@ test "File seek ops" {
fs.cwd().deleteFile(tmp_file_name) catch {};
}
try file.write(&([_]u8{0x55} ** 8192));
try file.writeAll(&([_]u8{0x55} ** 8192));
// Seek to the end
try file.seekFromEnd(0);

View File

@ -298,6 +298,11 @@ pub const ReadError = error{
/// buf.len. If 0 bytes were read, that means EOF.
/// If the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in error.WouldBlock.
///
/// Linux has a limit on how many bytes may be transferred in one `read` call, which is `0x7ffff000`
/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
/// well as stuffing the errno codes into the last `4096` values. This is noted on the `read` man page.
/// For POSIX the limit is `math.maxInt(isize)`.
pub fn read(fd: fd_t, buf: []u8) ReadError!usize {
if (builtin.os.tag == .windows) {
return windows.ReadFile(fd, buf, null);
@ -316,8 +321,15 @@ pub fn read(fd: fd_t, buf: []u8) ReadError!usize {
}
}
// Prevents EINVAL.
const max_count = switch (std.Target.current.os.tag) {
.linux => 0x7ffff000,
else => math.maxInt(isize),
};
const adjusted_len = math.min(max_count, buf.len);
while (true) {
const rc = system.read(fd, buf.ptr, buf.len);
const rc = system.read(fd, buf.ptr, adjusted_len);
switch (errno(rc)) {
0 => return @intCast(usize, rc),
EINTR => continue,
@ -352,32 +364,18 @@ pub fn read(fd: fd_t, buf: []u8) ReadError!usize {
/// * Windows
/// On these systems, the read races with concurrent writes to the same file descriptor.
pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize {
if (builtin.os.tag == .windows) {
// TODO batch these into parallel requests
var off: usize = 0;
var iov_i: usize = 0;
var inner_off: usize = 0;
while (true) {
const v = iov[iov_i];
const amt_read = try read(fd, v.iov_base[inner_off .. v.iov_len - inner_off]);
off += amt_read;
inner_off += amt_read;
if (inner_off == v.len) {
iov_i += 1;
inner_off = 0;
if (iov_i == iov.len) {
return off;
}
}
if (amt_read == 0) return off; // EOF
} else unreachable; // TODO https://github.com/ziglang/zig/issues/707
if (std.Target.current.os.tag == .windows) {
// TODO does Windows have a way to read an io vector?
if (iov.len == 0) return @as(usize, 0);
const first = iov[0];
return read(fd, first.iov_base[0..first.iov_len]);
}
while (true) {
// TODO handle the case when iov_len is too large and get rid of this @intCast
const rc = system.readv(fd, iov.ptr, @intCast(u32, iov.len));
const rc = system.readv(fd, iov.ptr, iov_count);
switch (errno(rc)) {
0 => return @bitCast(usize, rc),
0 => return @intCast(usize, rc),
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
@ -397,6 +395,8 @@ pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize {
}
}
pub const PReadError = ReadError || error{Unseekable};
/// Number of bytes read is returned. Upon reading end-of-file, zero is returned.
///
/// Retries when interrupted by a signal.
@ -405,7 +405,7 @@ pub fn readv(fd: fd_t, iov: []const iovec) ReadError!usize {
/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`.
/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are
/// used to perform the I/O. `error.WouldBlock` is not possible on Windows.
pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize {
pub fn pread(fd: fd_t, buf: []u8, offset: u64) PReadError!usize {
if (builtin.os.tag == .windows) {
return windows.ReadFile(fd, buf, offset);
}
@ -429,6 +429,9 @@ pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize {
ENOBUFS => return error.SystemResources,
ENOMEM => return error.SystemResources,
ECONNRESET => return error.ConnectionResetByPeer,
ENXIO => return error.Unseekable,
ESPIPE => return error.Unseekable,
EOVERFLOW => return error.Unseekable,
else => |err| return unexpectedErrno(err),
}
}
@ -448,75 +451,23 @@ pub fn pread(fd: fd_t, buf: []u8, offset: u64) ReadError!usize {
/// * Darwin
/// * Windows
/// On these systems, the read races with concurrent writes to the same file descriptor.
pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) ReadError!usize {
if (comptime std.Target.current.isDarwin()) {
// Darwin does not have preadv but it does have pread.
var off: usize = 0;
var iov_i: usize = 0;
var inner_off: usize = 0;
while (true) {
const v = iov[iov_i];
const rc = darwin.pread(fd, v.iov_base + inner_off, v.iov_len - inner_off, offset + off);
const err = darwin.getErrno(rc);
switch (err) {
0 => {
const amt_read = @bitCast(usize, rc);
off += amt_read;
inner_off += amt_read;
if (inner_off == v.iov_len) {
iov_i += 1;
inner_off = 0;
if (iov_i == iov.len) {
return off;
}
}
if (rc == 0) return off; // EOF
continue;
},
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
ESPIPE => unreachable, // fd is not seekable
EAGAIN => if (std.event.Loop.instance) |loop| {
loop.waitUntilFdReadable(fd);
continue;
} else {
return error.WouldBlock;
},
EBADF => unreachable, // always a race condition
EIO => return error.InputOutput,
EISDIR => return error.IsDir,
ENOBUFS => return error.SystemResources,
ENOMEM => return error.SystemResources,
else => return unexpectedErrno(err),
}
}
pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) PReadError!usize {
const have_pread_but_not_preadv = switch (std.Target.current.os.tag) {
.windows, .macosx, .ios, .watchos, .tvos => true,
else => false,
};
if (have_pread_but_not_preadv) {
// We could loop here; but proper usage of `preadv` must handle partial reads anyway.
// So we simply read into the first vector only.
if (iov.len == 0) return @as(usize, 0);
const first = iov[0];
return pread(fd, first.iov_base[0..first.iov_len], offset);
}
if (builtin.os.tag == .windows) {
// TODO batch these into parallel requests
var off: usize = 0;
var iov_i: usize = 0;
var inner_off: usize = 0;
while (true) {
const v = iov[iov_i];
const amt_read = try pread(fd, v.iov_base[inner_off .. v.iov_len - inner_off], offset + off);
off += amt_read;
inner_off += amt_read;
if (inner_off == v.len) {
iov_i += 1;
inner_off = 0;
if (iov_i == iov.len) {
return off;
}
}
if (amt_read == 0) return off; // EOF
} else unreachable; // TODO https://github.com/ziglang/zig/issues/707
}
const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31);
while (true) {
// TODO handle the case when iov_len is too large and get rid of this @intCast
const rc = system.preadv(fd, iov.ptr, @intCast(u32, iov.len), offset);
const rc = system.preadv(fd, iov.ptr, iov_count, offset);
switch (errno(rc)) {
0 => return @bitCast(usize, rc),
EINTR => continue,
@ -533,6 +484,9 @@ pub fn preadv(fd: fd_t, iov: []const iovec, offset: u64) ReadError!usize {
EISDIR => return error.IsDir,
ENOBUFS => return error.SystemResources,
ENOMEM => return error.SystemResources,
ENXIO => return error.Unseekable,
ESPIPE => return error.Unseekable,
EOVERFLOW => return error.Unseekable,
else => |err| return unexpectedErrno(err),
}
}
@ -553,10 +507,28 @@ pub const WriteError = error{
WouldBlock,
} || UnexpectedError;
/// Write to a file descriptor. Keeps trying if it gets interrupted.
/// If the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in error.WouldBlock.
pub fn write(fd: fd_t, bytes: []const u8) WriteError!void {
/// Write to a file descriptor.
/// Retries when interrupted by a signal.
/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero.
///
/// Note that a successful write() may transfer fewer than count bytes. Such partial writes can
/// occur for various reasons; for example, because there was insufficient space on the disk
/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or
/// similar was interrupted by a signal handler after it had transferred some, but before it had
/// transferred all of the requested bytes. In the event of a partial write, the caller can make
/// another write() call to transfer the remaining bytes. The subsequent call will either
/// transfer further bytes or may result in an error (e.g., if the disk is now full).
///
/// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`.
/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are
/// used to perform the I/O. `error.WouldBlock` is not possible on Windows.
///
/// Linux has a limit on how many bytes may be transferred in one `write` call, which is `0x7ffff000`
/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
/// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page.
/// The corresponding POSIX limit is `math.maxInt(isize)`.
pub fn write(fd: fd_t, bytes: []const u8) WriteError!usize {
if (builtin.os.tag == .windows) {
return windows.WriteFile(fd, bytes, null);
}
@ -568,26 +540,21 @@ pub fn write(fd: fd_t, bytes: []const u8) WriteError!void {
}};
var nwritten: usize = undefined;
switch (wasi.fd_write(fd, &ciovs, ciovs.len, &nwritten)) {
0 => return,
0 => return nwritten,
else => |err| return unexpectedErrno(err),
}
}
// Linux can return EINVAL when write amount is > 0x7ffff000
// See https://github.com/ziglang/zig/pull/743#issuecomment-363165856
// TODO audit this. Shawn Landden says that this is not actually true.
// if this logic should stay, move it to std.os.linux
const max_bytes_len = 0x7ffff000;
const max_count = switch (std.Target.current.os.tag) {
.linux => 0x7ffff000,
else => math.maxInt(isize),
};
const adjusted_len = math.min(max_count, bytes.len);
var index: usize = 0;
while (index < bytes.len) {
const amt_to_write = math.min(bytes.len - index, @as(usize, max_bytes_len));
const rc = system.write(fd, bytes.ptr + index, amt_to_write);
while (true) {
const rc = system.write(fd, bytes.ptr, adjusted_len);
switch (errno(rc)) {
0 => {
index += @intCast(usize, rc);
continue;
},
0 => return @intCast(usize, rc),
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
@ -611,14 +578,36 @@ pub fn write(fd: fd_t, bytes: []const u8) WriteError!void {
}
/// Write multiple buffers to a file descriptor.
/// If the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in error.WouldBlock.
pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!void {
/// Retries when interrupted by a signal.
/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero.
///
/// Note that a successful write() may transfer fewer bytes than supplied. Such partial writes can
/// occur for various reasons; for example, because there was insufficient space on the disk
/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or
/// similar was interrupted by a signal handler after it had transferred some, but before it had
/// transferred all of the requested bytes. In the event of a partial write, the caller can make
/// another write() call to transfer the remaining bytes. The subsequent call will either
/// transfer further bytes or may result in an error (e.g., if the disk is now full).
///
/// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`.
/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are
/// used to perform the I/O. `error.WouldBlock` is not possible on Windows.
///
/// If `iov.len` is larger than will fit in a `u31`, a partial write will occur.
pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!usize {
if (std.Target.current.os.tag == .windows) {
// TODO does Windows have a way to write an io vector?
if (iov.len == 0) return @as(usize, 0);
const first = iov[0];
return write(fd, first.iov_base[0..first.iov_len]);
}
const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31);
while (true) {
// TODO handle the case when iov_len is too large and get rid of this @intCast
const rc = system.writev(fd, iov.ptr, @intCast(u32, iov.len));
const rc = system.writev(fd, iov.ptr, iov_count);
switch (errno(rc)) {
0 => return,
0 => return @intCast(usize, rc),
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
@ -641,23 +630,45 @@ pub fn writev(fd: fd_t, iov: []const iovec_const) WriteError!void {
}
}
pub const PWriteError = WriteError || error{Unseekable};
/// Write to a file descriptor, with a position offset.
///
/// Retries when interrupted by a signal.
/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero.
///
/// Note that a successful write() may transfer fewer bytes than supplied. Such partial writes can
/// occur for various reasons; for example, because there was insufficient space on the disk
/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or
/// similar was interrupted by a signal handler after it had transferred some, but before it had
/// transferred all of the requested bytes. In the event of a partial write, the caller can make
/// another write() call to transfer the remaining bytes. The subsequent call will either
/// transfer further bytes or may result in an error (e.g., if the disk is now full).
///
/// For POSIX systems, if the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`.
/// On Windows, if the application has a global event loop enabled, I/O Completion Ports are
/// used to perform the I/O. `error.WouldBlock` is not possible on Windows.
pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) WriteError!void {
///
/// Linux has a limit on how many bytes may be transferred in one `pwrite` call, which is `0x7ffff000`
/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
/// well as stuffing the errno codes into the last `4096` values. This is noted on the `write` man page.
/// The corresponding POSIX limit is `math.maxInt(isize)`.
pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) PWriteError!usize {
if (std.Target.current.os.tag == .windows) {
return windows.WriteFile(fd, bytes, offset);
}
// Prevent EINVAL.
const max_count = switch (std.Target.current.os.tag) {
.linux => 0x7ffff000,
else => math.maxInt(isize),
};
const adjusted_len = math.min(max_count, bytes.len);
while (true) {
const rc = system.pwrite(fd, bytes.ptr, bytes.len, offset);
const rc = system.pwrite(fd, bytes.ptr, adjusted_len, offset);
switch (errno(rc)) {
0 => return,
0 => return @intCast(usize, rc),
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
@ -675,84 +686,54 @@ pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) WriteError!void {
ENOSPC => return error.NoSpaceLeft,
EPERM => return error.AccessDenied,
EPIPE => return error.BrokenPipe,
ENXIO => return error.Unseekable,
ESPIPE => return error.Unseekable,
EOVERFLOW => return error.Unseekable,
else => |err| return unexpectedErrno(err),
}
}
}
/// Write multiple buffers to a file descriptor, with a position offset.
///
/// Retries when interrupted by a signal.
/// Returns the number of bytes written. If nonzero bytes were supplied, this will be nonzero.
///
/// Note that a successful write() may transfer fewer than count bytes. Such partial writes can
/// occur for various reasons; for example, because there was insufficient space on the disk
/// device to write all of the requested bytes, or because a blocked write() to a socket, pipe, or
/// similar was interrupted by a signal handler after it had transferred some, but before it had
/// transferred all of the requested bytes. In the event of a partial write, the caller can make
/// another write() call to transfer the remaining bytes. The subsequent call will either
/// transfer further bytes or may result in an error (e.g., if the disk is now full).
///
/// If the application has a global event loop enabled, EAGAIN is handled
/// via the event loop. Otherwise EAGAIN results in `error.WouldBlock`.
///
/// This operation is non-atomic on the following systems:
/// The following systems do not have this syscall, and will return partial writes if more than one
/// vector is provided:
/// * Darwin
/// * Windows
/// On these systems, the write races with concurrent writes to the same file descriptor, and
/// the file can be in a partially written state when an error occurs.
pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) WriteError!void {
if (comptime std.Target.current.isDarwin()) {
// Darwin does not have pwritev but it does have pwrite.
var off: usize = 0;
var iov_i: usize = 0;
var inner_off: usize = 0;
while (true) {
const v = iov[iov_i];
const rc = darwin.pwrite(fd, v.iov_base + inner_off, v.iov_len - inner_off, offset + off);
const err = darwin.getErrno(rc);
switch (err) {
0 => {
const amt_written = @bitCast(usize, rc);
off += amt_written;
inner_off += amt_written;
if (inner_off == v.iov_len) {
iov_i += 1;
inner_off = 0;
if (iov_i == iov.len) {
return;
}
}
continue;
},
EINTR => continue,
ESPIPE => unreachable, // `fd` is not seekable.
EINVAL => unreachable,
EFAULT => unreachable,
EAGAIN => if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(fd);
continue;
} else {
return error.WouldBlock;
},
EBADF => unreachable, // Always a race condition.
EDESTADDRREQ => unreachable, // `connect` was never called.
EDQUOT => return error.DiskQuota,
EFBIG => return error.FileTooBig,
EIO => return error.InputOutput,
ENOSPC => return error.NoSpaceLeft,
EPERM => return error.AccessDenied,
EPIPE => return error.BrokenPipe,
else => return unexpectedErrno(err),
}
}
}
if (std.Target.current.os.tag == .windows) {
var off = offset;
for (iov) |item| {
try pwrite(fd, item.iov_base[0..item.iov_len], off);
off += buf.len;
}
return;
///
/// If `iov.len` is larger than will fit in a `u31`, a partial write will occur.
pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) PWriteError!usize {
const have_pwrite_but_not_pwritev = switch (std.Target.current.os.tag) {
.windows, .macosx, .ios, .watchos, .tvos => true,
else => false,
};
if (have_pwrite_but_not_pwritev) {
// We could loop here; but proper usage of `pwritev` must handle partial writes anyway.
// So we simply write the first vector only.
if (iov.len == 0) return @as(usize, 0);
const first = iov[0];
return pwrite(fd, first.iov_base[0..first.iov_len], offset);
}
const iov_count = math.cast(u31, iov.len) catch math.maxInt(u31);
while (true) {
// TODO handle the case when iov_len is too large and get rid of this @intCast
const rc = system.pwritev(fd, iov.ptr, @intCast(u32, iov.len), offset);
const rc = system.pwritev(fd, iov.ptr, iov_count, offset);
switch (errno(rc)) {
0 => return,
0 => return @intCast(usize, rc),
EINTR => continue,
EINVAL => unreachable,
EFAULT => unreachable,
@ -770,6 +751,9 @@ pub fn pwritev(fd: fd_t, iov: []const iovec_const, offset: u64) WriteError!void
ENOSPC => return error.NoSpaceLeft,
EPERM => return error.AccessDenied,
EPIPE => return error.BrokenPipe,
ENXIO => return error.Unseekable,
ESPIPE => return error.Unseekable,
EOVERFLOW => return error.Unseekable,
else => |err| return unexpectedErrno(err),
}
}
@ -3389,7 +3373,6 @@ pub const SendError = error{
/// The socket type requires that message be sent atomically, and the size of the message
/// to be sent made this impossible. The message is not transmitted.
///
MessageTooBig,
/// The output queue for a network interface was full. This generally indicates that the
@ -3498,119 +3481,312 @@ pub fn send(
return sendto(sockfd, buf, flags, null, 0);
}
pub const SendFileError = error{
/// There was an unspecified error while reading from infd.
InputOutput,
pub const SendFileError = PReadError || WriteError || SendError;
/// There was insufficient resources for processing.
SystemResources,
/// The value provided for count overflows the maximum size of either
/// infd or outfd.
Overflow,
/// Offset was provided, but infd is not seekable.
Unseekable,
/// The outfd is marked nonblocking and the requested operation would block, and
/// there is no global event loop configured.
WouldBlock,
} || WriteError || UnexpectedError;
pub const sf_hdtr = struct {
headers: []iovec_const,
trailers: []iovec_const,
};
/// Transfer data between file descriptors.
///
/// The `sendfile` call copies `count` bytes from one file descriptor to another within the kernel. This can
/// be more performant than transferring data from the kernel to user space and back, such as with
/// `read` and `write` calls.
///
/// The `infd` should be a file descriptor opened for reading, and `outfd` should be a file descriptor
/// opened for writing. Copying will begin at `offset`, if not null, which will be updated to reflect
/// the number of bytes read. If `offset` is null, the copying will begin at the current seek position,
/// and the file position will be updated.
pub fn sendfile(infd: fd_t, outfd: fd_t, offset: u64, count: usize, optional_hdtr: ?*const sf_hdtr, flags: u32) SendFileError!usize {
// XXX: check if offset is > length of file, return 0 bytes written
// XXX: document systems where headers are sent atomically.
// XXX: compute new offset on EINTR/EAGAIN
var rc: usize = undefined;
var err: usize = undefined;
if (builtin.os == .linux) {
while (true) {
try lseek_SET(infd, offset);
if (optional_hdtr) |hdtr| {
try writev(outfd, hdtr.headers);
}
rc = system.sendfile(outfd, infd, null, count);
err = errno(rc);
if (optional_hdtr) |hdtr| {
try writev(outfd, hdtr.trailers);
}
switch (err) {
0 => return @intCast(usize, rc),
else => return unexpectedErrno(err),
EBADF => unreachable,
EINVAL => unreachable,
EFAULT => unreachable,
EAGAIN => if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(outfd);
continue;
} else {
return error.WouldBlock;
},
EIO => return error.InputOutput,
ENOMEM => return error.SystemResources,
EOVERFLOW => return error.Overflow,
ESPIPE => return error.Unseekable,
}
}
} else if (builtin.os == .freebsd) {
while (true) {
var rcount: u64 = 0;
var hdtr: std.c.sf_hdtr = undefined;
if (optional_hdtr) |h| {
hdtr = std.c.sf_hdtr{
.headers = h.headers.ptr,
.hdr_cnt = @intCast(c_int, h.headers.len),
.trailers = h.trailers.ptr,
.trl_cnt = @intCast(c_int, h.trailers.len),
};
}
err = errno(system.sendfile(infd, outfd, offset, count, &hdtr, &rcount, @intCast(c_int, flags)));
switch (err) {
0 => return @intCast(usize, rcount),
else => return unexpectedErrno(err),
EBADF => unreachable,
EFAULT => unreachable,
EINVAL => unreachable,
ENOTCAPABLE => unreachable,
ENOTCONN => unreachable,
ENOTSOCK => unreachable,
EAGAIN => if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(outfd);
continue;
} else {
return error.WouldBlock;
},
EBUSY => return error.DeviceBusy,
EINTR => continue,
EIO => return error.InputOutput,
ENOBUFS => return error.SystemResources,
EPIPE => return error.BrokenPipe,
}
}
} else {
@compileError("sendfile unimplemented for this target");
fn count_iovec_bytes(iovs: []const iovec_const) usize {
var count: usize = 0;
for (iovs) |iov| {
count += iov.iov_len;
}
return count;
}
/// Transfer data between file descriptors, with optional headers and trailers.
/// Returns the number of bytes written. This will be zero if `in_offset` falls beyond the end of the file.
///
/// The `sendfile` call copies `count` bytes from one file descriptor to another. When possible,
/// this is done within the operating system kernel, which can provide better performance
/// characteristics than transferring data from kernel to user space and back, such as with
/// `read` and `write` calls. When `count` is `0`, it means to copy until the end of the input file has been
/// reached. Note, however, that partial writes are still possible in this case.
///
/// `in_fd` must be a file descriptor opened for reading, and `out_fd` must be a file descriptor
/// opened for writing. They may be any kind of file descriptor; however, if `in_fd` is not a regular
/// file system file, it may cause this function to fall back to calling `read` and `write`, in which case
/// atomicity guarantees no longer apply.
///
/// Copying begins reading at `in_offset`. The input file descriptor seek position is ignored and not updated.
/// If the output file descriptor has a seek position, it is updated as bytes are written.
///
/// `flags` has different meanings per operating system; refer to the respective man pages.
///
/// These systems support atomically sending everything, including headers and trailers:
/// * macOS
/// * FreeBSD
///
/// These systems support in-kernel data copying, but headers and trailers are not sent atomically:
/// * Linux
///
/// Other systems fall back to calling `read` / `write`.
///
/// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000`
/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
/// well as stuffing the errno codes into the last `4096` values. This is cited on the `sendfile` man page.
/// The corresponding POSIX limit on this is `math.maxInt(isize)`.
pub fn sendfile(
out_fd: fd_t,
in_fd: fd_t,
in_offset: u64,
count: usize,
headers: []const iovec_const,
trailers: []const iovec_const,
flags: u32,
) SendFileError!usize {
var header_done = false;
var total_written: usize = 0;
// Prevents EOVERFLOW.
const max_count = switch (std.Target.current.os.tag) {
.linux => 0x7ffff000,
else => math.maxInt(isize),
};
switch (std.Target.current.os.tag) {
.linux => sf: {
// sendfile() first appeared in Linux 2.2, glibc 2.1.
const call_sf = comptime if (builtin.link_libc)
std.c.versionCheck(.{ .major = 2, .minor = 1 }).ok
else
std.Target.current.os.version_range.linux.range.max.order(.{ .major = 2, .minor = 2 }) != .lt;
if (!call_sf) break :sf;
if (headers.len != 0) {
const amt = try writev(out_fd, headers);
total_written += amt;
if (amt < count_iovec_bytes(headers)) return total_written;
header_done = true;
}
// Here we match BSD behavior, making a zero count value send as many bytes as possible.
const adjusted_count = if (count == 0) max_count else math.min(count, max_count);
while (true) {
var offset: off_t = @bitCast(off_t, in_offset);
const rc = system.sendfile(out_fd, in_fd, &offset, adjusted_count);
switch (errno(rc)) {
0 => {
const amt = @bitCast(usize, rc);
total_written += amt;
if (count == 0 and amt == 0) {
// We have detected EOF from `in_fd`.
break;
} else if (amt < count) {
return total_written;
} else {
break;
}
},
EBADF => unreachable, // Always a race condition.
EFAULT => unreachable, // Segmentation fault.
EOVERFLOW => unreachable, // We avoid passing too large of a `count`.
ENOTCONN => unreachable, // `out_fd` is an unconnected socket.
EINVAL, ENOSYS => {
// EINVAL could be any of the following situations:
// * Descriptor is not valid or locked
// * an mmap(2)-like operation is not available for in_fd
// * count is negative
// * out_fd has the O_APPEND flag set
// Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write
// manually, the same as ENOSYS.
break :sf;
},
EAGAIN => if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(out_fd);
continue;
} else {
return error.WouldBlock;
},
EIO => return error.InputOutput,
EPIPE => return error.BrokenPipe,
ENOMEM => return error.SystemResources,
ENXIO => return error.Unseekable,
ESPIPE => return error.Unseekable,
else => |err| {
const discard = unexpectedErrno(err);
break :sf;
},
}
}
if (trailers.len != 0) {
total_written += try writev(out_fd, trailers);
}
return total_written;
},
.freebsd => sf: {
var hdtr_data: std.c.sf_hdtr = undefined;
var hdtr: ?*std.c.sf_hdtr = null;
if (headers.len != 0 or trailers.len != 0) {
// Here we carefully avoid `@intCast` by returning partial writes when
// too many io vectors are provided.
const hdr_cnt = math.cast(u31, headers.len) catch math.maxInt(u31);
if (headers.len > hdr_cnt) return writev(out_fd, headers);
const trl_cnt = math.cast(u31, trailers.len) catch math.maxInt(u31);
hdtr_data = std.c.sf_hdtr{
.headers = headers.ptr,
.hdr_cnt = hdr_cnt,
.trailers = trailers.ptr,
.trl_cnt = trl_cnt,
};
hdtr = &hdtr_data;
}
const adjusted_count = math.min(count, max_count);
while (true) {
var sbytes: off_t = undefined;
const err = errno(system.sendfile(out_fd, in_fd, in_offset, adjusted_count, hdtr, &sbytes, flags));
const amt = @bitCast(usize, sbytes);
switch (err) {
0 => return amt,
EBADF => unreachable, // Always a race condition.
EFAULT => unreachable, // Segmentation fault.
ENOTCONN => unreachable, // `out_fd` is an unconnected socket.
EINVAL, EOPNOTSUPP, ENOTSOCK, ENOSYS => {
// EINVAL could be any of the following situations:
// * The fd argument is not a regular file.
// * The s argument is not a SOCK_STREAM type socket.
// * The offset argument is negative.
// Because of some of these possibilities, we fall back to doing read/write
// manually, the same as ENOSYS.
break :sf;
},
EINTR => if (amt != 0) return amt else continue,
EAGAIN => if (amt != 0) {
return amt;
} else if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(out_fd);
continue;
} else {
return error.WouldBlock;
},
EBUSY => if (amt != 0) {
return amt;
} else if (std.event.Loop.instance) |loop| {
loop.waitUntilFdReadable(in_fd);
continue;
} else {
return error.WouldBlock;
},
EIO => return error.InputOutput,
ENOBUFS => return error.SystemResources,
EPIPE => return error.BrokenPipe,
else => {
const discard = unexpectedErrno(err);
if (amt != 0) {
return amt;
} else {
break :sf;
}
},
}
}
},
.macosx, .ios, .tvos, .watchos => sf: {
var hdtr_data: std.c.sf_hdtr = undefined;
var hdtr: ?*std.c.sf_hdtr = null;
if (headers.len != 0 or trailers.len != 0) {
// Here we carefully avoid `@intCast` by returning partial writes when
// too many io vectors are provided.
const hdr_cnt = math.cast(u31, headers.len) catch math.maxInt(u31);
if (headers.len > hdr_cnt) return writev(out_fd, headers);
const trl_cnt = math.cast(u31, trailers.len) catch math.maxInt(u31);
hdtr_data = std.c.sf_hdtr{
.headers = headers.ptr,
.hdr_cnt = hdr_cnt,
.trailers = trailers.ptr,
.trl_cnt = trl_cnt,
};
hdtr = &hdtr_data;
}
const adjusted_count = math.min(count, max_count);
while (true) {
var sbytes: off_t = adjusted_count;
const err = errno(system.sendfile(out_fd, in_fd, in_offset, &sbytes, hdtr, flags));
const amt = @bitCast(usize, sbytes);
switch (err) {
0 => return amt,
EBADF => unreachable, // Always a race condition.
EFAULT => unreachable, // Segmentation fault.
EINVAL => unreachable,
ENOTCONN => unreachable, // `out_fd` is an unconnected socket.
ENOTSUP, ENOTSOCK, ENOSYS => break :sf,
EINTR => if (amt != 0) return amt else continue,
EAGAIN => if (amt != 0) {
return amt;
} else if (std.event.Loop.instance) |loop| {
loop.waitUntilFdWritable(out_fd);
continue;
} else {
return error.WouldBlock;
},
EIO => return error.InputOutput,
EPIPE => return error.BrokenPipe,
else => {
_ = unexpectedErrno(err);
if (amt != 0) {
return amt;
} else {
break :sf;
}
},
}
}
},
else => {}, // fall back to read/write
}
if (headers.len != 0 and !header_done) {
const amt = try writev(out_fd, headers);
total_written += amt;
if (amt < count_iovec_bytes(headers)) return total_written;
}
rw: {
var buf: [8 * 4096]u8 = undefined;
// Here we match BSD behavior, making a zero count value send as many bytes as possible.
const adjusted_count = if (count == 0) buf.len else math.min(buf.len, count);
const amt_read = try pread(in_fd, buf[0..adjusted_count], in_offset);
if (amt_read == 0) {
if (count == 0) {
// We have detected EOF from `in_fd`.
break :rw;
} else {
return total_written;
}
}
const amt_written = try write(out_fd, buf[0..amt_read]);
total_written += amt_written;
if (amt_written < count or count == 0) return total_written;
}
if (trailers.len != 0) {
total_written += try writev(out_fd, trailers);
}
return total_written;
}
pub const PollError = error{

View File

@ -82,6 +82,7 @@ pub const SYS_pread64 = 67;
pub const SYS_pwrite64 = 68;
pub const SYS_preadv = 69;
pub const SYS_pwritev = 70;
pub const SYS_sendfile = 71;
pub const SYS_pselect6 = 72;
pub const SYS_ppoll = 73;
pub const SYS_signalfd4 = 74;

View File

@ -316,8 +316,19 @@ pub fn symlinkat(existing: [*:0]const u8, newfd: i32, newpath: [*:0]const u8) us
return syscall3(SYS_symlinkat, @ptrToInt(existing), @bitCast(usize, @as(isize, newfd)), @ptrToInt(newpath));
}
pub fn pread(fd: i32, buf: [*]u8, count: usize, offset: usize) usize {
return syscall4(SYS_pread, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), count, offset);
pub fn pread(fd: i32, buf: [*]u8, count: usize, offset: u64) usize {
if (@hasDecl(@This(), "SYS_pread64")) {
return syscall5(
SYS_pread64,
@bitCast(usize, @as(isize, fd)),
@ptrToInt(buf),
count,
@truncate(usize, offset),
@truncate(usize, offset >> 32),
);
} else {
return syscall4(SYS_pread, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), count, offset);
}
}
pub fn access(path: [*:0]const u8, mode: u32) usize {
@ -846,11 +857,23 @@ pub fn sendto(fd: i32, buf: [*]const u8, len: usize, flags: u32, addr: ?*const s
return syscall6(SYS_sendto, @bitCast(usize, @as(isize, fd)), @ptrToInt(buf), len, flags, @ptrToInt(addr), @intCast(usize, alen));
}
pub fn sendfile(outfd: i32, infd: i32, offset: ?*u64, count: usize) usize {
pub fn sendfile(outfd: i32, infd: i32, offset: ?*i64, count: usize) usize {
if (@hasDecl(@This(), "SYS_sendfile64")) {
return syscall4(SYS_sendfile64, @bitCast(usize, @as(isize, outfd)), @bitCast(usize, @as(isize, infd)), @ptrToInt(offset), count);
return syscall4(
SYS_sendfile64,
@bitCast(usize, @as(isize, outfd)),
@bitCast(usize, @as(isize, infd)),
@ptrToInt(offset),
count,
);
} else {
return syscall4(SYS_sendfile, @bitCast(usize, @as(isize, outfd)), @bitCast(usize, @as(isize, infd)), @ptrToInt(offset), count);
return syscall4(
SYS_sendfile,
@bitCast(usize, @as(isize, outfd)),
@bitCast(usize, @as(isize, infd)),
@ptrToInt(offset),
count,
);
}
}

View File

@ -44,6 +44,114 @@ fn testThreadIdFn(thread_id: *Thread.Id) void {
thread_id.* = Thread.getCurrentId();
}
test "sendfile" {
try fs.makePath(a, "os_test_tmp");
defer fs.deleteTree("os_test_tmp") catch {};
var dir = try fs.cwd().openDirList("os_test_tmp");
defer dir.close();
const line1 = "line1\n";
const line2 = "second line\n";
var vecs = [_]os.iovec_const{
.{
.iov_base = line1,
.iov_len = line1.len,
},
.{
.iov_base = line2,
.iov_len = line2.len,
},
};
var src_file = try dir.createFileC("sendfile1.txt", .{ .read = true });
defer src_file.close();
try src_file.writevAll(&vecs);
var dest_file = try dir.createFileC("sendfile2.txt", .{ .read = true });
defer dest_file.close();
const header1 = "header1\n";
const header2 = "second header\n";
var headers = [_]os.iovec_const{
.{
.iov_base = header1,
.iov_len = header1.len,
},
.{
.iov_base = header2,
.iov_len = header2.len,
},
};
const trailer1 = "trailer1\n";
const trailer2 = "second trailer\n";
var trailers = [_]os.iovec_const{
.{
.iov_base = trailer1,
.iov_len = trailer1.len,
},
.{
.iov_base = trailer2,
.iov_len = trailer2.len,
},
};
var written_buf: [header1.len + header2.len + 10 + trailer1.len + trailer2.len]u8 = undefined;
try sendfileAll(dest_file.handle, src_file.handle, 1, 10, &headers, &trailers, 0);
try dest_file.preadAll(&written_buf, 0);
expect(mem.eql(u8, &written_buf, "header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n"));
}
fn sendfileAll(
out_fd: os.fd_t,
in_fd: os.fd_t,
offset: u64,
count: usize,
headers: []os.iovec_const,
trailers: []os.iovec_const,
flags: u32,
) os.SendFileError!void {
var amt: usize = undefined;
hdrs: {
var i: usize = 0;
while (i < headers.len) {
amt = try os.sendfile(out_fd, in_fd, offset, count, headers[i..], trailers, flags);
while (amt >= headers[i].iov_len) {
amt -= headers[i].iov_len;
i += 1;
if (i >= headers.len) break :hdrs;
}
headers[i].iov_base += amt;
headers[i].iov_len -= amt;
}
}
var off = amt;
while (off < count) {
amt = try os.sendfile(out_fd, in_fd, offset + off, count - off, &[0]os.iovec_const{}, trailers, flags);
off += amt;
}
amt = off - count;
var i: usize = 0;
while (i < trailers.len) {
while (amt >= headers[i].iov_len) {
amt -= trailers[i].iov_len;
i += 1;
if (i >= trailers.len) return;
}
trailers[i].iov_base += amt;
trailers[i].iov_len -= amt;
if (std.Target.current.os.tag == .windows) {
amt = try os.writev(out_fd, trailers[i..]);
} else {
// Here we must use send because it's the only way to give the flags.
amt = try os.send(out_fd, trailers[i].iov_base[0..trailers[i].iov_len], flags);
}
}
}
test "std.Thread.getCurrentId" {
if (builtin.single_threaded) return error.SkipZigTest;
@ -103,7 +211,7 @@ test "AtomicFile" {
{
var af = try fs.AtomicFile.init(test_out_file, File.default_mode);
defer af.deinit();
try af.file.write(test_content);
try af.file.writeAll(test_content);
try af.finish();
}
const content = try io.readFileAlloc(testing.allocator, test_out_file);
@ -226,7 +334,7 @@ test "pipe" {
return error.SkipZigTest;
var fds = try os.pipe();
try os.write(fds[1], "hello");
expect((try os.write(fds[1], "hello")) == 5);
var buf: [16]u8 = undefined;
expect((try os.read(fds[0], buf[0..])) == 5);
testing.expectEqualSlices(u8, buf[0..5], "hello");
@ -248,7 +356,7 @@ test "memfd_create" {
else => |e| return e,
};
defer std.os.close(fd);
try std.os.write(fd, "test");
expect((try std.os.write(fd, "test")) == 4);
try std.os.lseek_SET(fd, 0);
var buf: [10]u8 = undefined;

View File

@ -424,7 +424,7 @@ pub const WriteFileError = error{
Unexpected,
};
pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError!void {
pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError!usize {
if (std.event.Loop.instance) |loop| {
// TODO support async WriteFile with no offset
const off = offset.?;
@ -445,8 +445,8 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError
_ = CreateIoCompletionPort(fd, loop.os_data.io_port, undefined, undefined);
loop.beginOneEvent();
suspend {
// TODO replace this @intCast with a loop that writes all the bytes
_ = kernel32.WriteFile(fd, bytes.ptr, @intCast(windows.DWORD, bytes.len), null, &resume_node.base.overlapped);
const adjusted_len = math.cast(windows.DWORD, bytes.len) catch maxInt(windows.DWORD);
_ = kernel32.WriteFile(fd, bytes.ptr, adjusted_len, null, &resume_node.base.overlapped);
}
var bytes_transferred: windows.DWORD = undefined;
if (kernel32.GetOverlappedResult(fd, &resume_node.base.overlapped, &bytes_transferred, FALSE) == 0) {
@ -460,6 +460,7 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError
else => |err| return windows.unexpectedError(err),
}
}
return bytes_transferred;
} else {
var bytes_written: DWORD = undefined;
var overlapped_data: OVERLAPPED = undefined;
@ -473,18 +474,19 @@ pub fn WriteFile(handle: HANDLE, bytes: []const u8, offset: ?u64) WriteFileError
};
break :blk &overlapped_data;
} else null;
// TODO replace this @intCast with a loop that writes all the bytes
if (kernel32.WriteFile(handle, bytes.ptr, @intCast(u32, bytes.len), &bytes_written, overlapped) == 0) {
const adjusted_len = math.cast(u32, bytes.len) catch maxInt(u32);
if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) {
switch (kernel32.GetLastError()) {
.INVALID_USER_BUFFER => return error.SystemResources,
.NOT_ENOUGH_MEMORY => return error.SystemResources,
.OPERATION_ABORTED => return error.OperationAborted,
.NOT_ENOUGH_QUOTA => return error.SystemResources,
.IO_PENDING => unreachable, // this function is for blocking files only
.IO_PENDING => unreachable,
.BROKEN_PIPE => return error.BrokenPipe,
else => |err| return unexpectedError(err),
}
}
return bytes_written;
}
}

View File

@ -29,7 +29,7 @@ pub fn render(allocator: *mem.Allocator, stream: var, tree: *ast.Tree) (@TypeOf(
source_index: usize,
source: []const u8,
fn write(iface_stream: *Stream, bytes: []const u8) StreamError!void {
fn write(iface_stream: *Stream, bytes: []const u8) StreamError!usize {
const self = @fieldParentPtr(MyStream, "stream", iface_stream);
if (!self.anything_changed_ptr.*) {
@ -45,7 +45,7 @@ pub fn render(allocator: *mem.Allocator, stream: var, tree: *ast.Tree) (@TypeOf(
}
}
try self.child_stream.write(bytes);
return self.child_stream.writeOnce(bytes);
}
};
var my_stream = MyStream{
@ -2443,14 +2443,15 @@ const FindByteOutStream = struct {
};
}
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!void {
fn writeFn(out_stream: *Stream, bytes: []const u8) Error!usize {
const self = @fieldParentPtr(Self, "stream", out_stream);
if (self.byte_found) return;
if (self.byte_found) return bytes.len;
self.byte_found = blk: {
for (bytes) |b|
if (b == self.byte) break :blk true;
break :blk false;
};
return bytes.len;
}
};

View File

@ -796,6 +796,7 @@ pub const NativeTargetInfo = struct {
error.SystemResources => return error.SystemResources,
error.IsDir => return error.UnableToReadElfFile,
error.BrokenPipe => return error.UnableToReadElfFile,
error.Unseekable => return error.UnableToReadElfFile,
error.ConnectionResetByPeer => return error.UnableToReadElfFile,
error.Unexpected => return error.Unexpected,
error.InputOutput => return error.FileSystem,

View File

@ -42,6 +42,7 @@ fn usage(exe: []const u8) !void {
return error.Invalid;
}
// TODO use copy_file_range
fn cat_file(stdout: fs.File, file: fs.File) !void {
var buf: [1024 * 4]u8 = undefined;
@ -55,7 +56,7 @@ fn cat_file(stdout: fs.File, file: fs.File) !void {
break;
}
stdout.write(buf[0..bytes_read]) catch |err| {
stdout.writeAll(buf[0..bytes_read]) catch |err| {
warn("Unable to write to stdout: {}\n", .{@errorName(err)});
return err;
};

View File

@ -1,8 +1,5 @@
const std = @import("std");
pub fn main() !void {
const stdout_file = std.io.getStdOut();
// If this program encounters pipe failure when printing to stdout, exit
// with an error.
try stdout_file.write("Hello, world!\n");
try std.io.getStdOut().writeAll("Hello, World!\n");
}