From 1164d5ece5b12b573c6501c94b9ad9e326199ba9 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 13 Apr 2025 18:57:51 -0700 Subject: [PATCH] tweak std.io.Writer and followups remove std.fs.Dir.readFileAllocOptions, replace with more flexible API readFileIntoArrayList remove std.fs.File.readToEndAllocOptions, replace with more flexible API readIntoArrayList update std.fs.File to new reader/writer API add helper functions to std.io.Reader.Limit replace std.io.Writer.FileLen with std.io.Reader.Limit make offset a type rather than u64 so that it can distinguish between streaming read and positional read avoid an unnecessary allocation in std.zig.readSourceFileToEndAlloc when there is a UTF-16 little endian BOM. --- lib/compiler/std-docs.zig | 36 +++--- lib/std/fs/Dir.zig | 80 +++++++++----- lib/std/fs/File.zig | 202 +++++++++++++++++----------------- lib/std/io/BufferedReader.zig | 4 +- lib/std/io/BufferedWriter.zig | 30 ++--- lib/std/io/Reader.zig | 38 ++++++- lib/std/io/Writer.zig | 46 +++----- lib/std/zig.zig | 33 ++---- 8 files changed, 261 insertions(+), 208 deletions(-) diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index 6247077527..03b2c3b46c 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -1,13 +1,12 @@ const builtin = @import("builtin"); const std = @import("std"); const mem = std.mem; -const io = std.io; const Allocator = std.mem.Allocator; const assert = std.debug.assert; const Cache = std.Build.Cache; fn usage() noreturn { - io.getStdOut().writeAll( + std.fs.File.stdout().writeAll( \\Usage: zig std [options] \\ \\Options: @@ -63,7 +62,7 @@ pub fn main() !void { var http_server = try address.listen(.{}); const port = http_server.listen_address.in.getPort(); const url_with_newline = try std.fmt.allocPrint(arena, "http://127.0.0.1:{d}/\n", .{port}); - std.io.getStdOut().writeAll(url_with_newline) catch {}; + std.fs.File.stdout().writeAll(url_with_newline) catch {}; if (should_open_browser) { openBrowserTab(gpa, url_with_newline[0 .. url_with_newline.len - 1 :'\n']) catch |err| { std.log.err("unable to open browser: {s}", .{@errorName(err)}); @@ -155,18 +154,29 @@ fn serveDocsFile( name: []const u8, content_type: []const u8, ) !void { - const gpa = context.gpa; - // The desired API is actually sendfile, which will require enhancing std.http.Server. - // We load the file with every request so that the user can make changes to the file - // and refresh the HTML page without restarting this server. - const file_contents = try context.lib_dir.readFileAlloc(gpa, name, 10 * 1024 * 1024); - defer gpa.free(file_contents); - try request.respond(file_contents, .{ - .extra_headers = &.{ - .{ .name = "content-type", .value = content_type }, - cache_control_header, + // Open the file with every request so that the user can make changes to + // the file and refresh the HTML page without restarting this server. + var file = try context.lib_dir.openFile(name, .{}); + defer file.close(); + const content_length = std.math.cast(usize, (try file.stat()).size) orelse return error.FileTooBig; + + var send_buffer: [4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .content_length = content_length, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = content_type }, + cache_control_header, + }, }, }); + + try response.writer().unbuffered().writeFileAll(file, .{ + .offset = .zero, + .limit = .init(content_length), + }); + try response.end(); } fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index 5d88a2de15..2826f72ef6 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -1963,41 +1963,65 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 { return buffer[0..end_index]; } -/// On success, caller owns returned buffer. -/// If the file is larger than `max_bytes`, returns `error.FileTooBig`. -/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). -/// On WASI, `file_path` should be encoded as valid UTF-8. -/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. -pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 { - return self.readFileAllocOptions(allocator, file_path, max_bytes, null, .of(u8), null); +/// Reads all the bytes from the named file. On success, caller owns returned +/// buffer. +pub fn readFileAlloc( + dir: Dir, + /// On Windows, should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). + /// On WASI, should be encoded as valid UTF-8. + /// On other platforms, an opaque sequence of bytes with no particular encoding. + file_path: []const u8, + /// Used to allocate the result. + gpa: mem.Allocator, + /// If exceeded: + /// * The array list's length is increased by exactly one byte past `limit`. + /// * The file seek position is advanced by exactly one byte past `limit`. + /// * `error.FileTooBig` is returned. + limit: std.io.Reader.Limit, +) (File.OpenError || File.ReadAllocError)![]u8 { + var buffer: std.ArrayListUnmanaged(u8) = .empty; + defer buffer.deinit(gpa); + try readFileIntoArrayList(dir, file_path, gpa, limit, null, &buffer); + return buffer.toOwnedSlice(gpa); } -/// On success, caller owns returned buffer. -/// If the file is larger than `max_bytes`, returns `error.FileTooBig`. -/// If `size_hint` is specified the initial buffer size is calculated using -/// that value, otherwise the effective file size is used instead. -/// Allows specifying alignment and a sentinel value. -/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). -/// On WASI, `file_path` should be encoded as valid UTF-8. -/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. -pub fn readFileAllocOptions( - self: Dir, - allocator: mem.Allocator, +/// Reads all the bytes from the named file, appending them into the provided +/// array list. +/// +/// If `limit` is exceeded: +/// * The array list's length is increased by exactly one byte past `limit`. +/// * The file seek position is advanced by exactly one byte past `limit`. +/// * `error.FileTooBig` is returned. +pub fn readFileIntoArrayList( + dir: Dir, + /// On Windows, should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). + /// On WASI, should be encoded as valid UTF-8. + /// On other platforms, an opaque sequence of bytes with no particular encoding. file_path: []const u8, - max_bytes: usize, + gpa: Allocator, + limit: std.io.Reader.Limit, + /// If specified, the initial buffer size is calculated using this value, + /// otherwise the effective file size is used instead. size_hint: ?usize, - comptime alignment: std.mem.Alignment, - comptime optional_sentinel: ?u8, -) !(if (optional_sentinel) |s| [:s]align(alignment.toByteUnits()) u8 else []align(alignment.toByteUnits()) u8) { - var file = try self.openFile(file_path, .{}); + comptime alignment: ?std.mem.Alignment, + list: *std.ArrayListAligned(u8, alignment), +) (File.OpenError || File.ReadAllocError)!void { + var file = try dir.openFile(file_path, .{}); defer file.close(); - // If the file size doesn't fit a usize it'll be certainly greater than - // `max_bytes` - const stat_size = size_hint orelse std.math.cast(usize, try file.getEndPos()) orelse - return error.FileTooBig; + // Apply size hint by adjusting the array list's capacity. + if (size_hint) |size| { + try list.ensureUnusedCapacity(gpa, size); + } else if (file.getEndPos()) |size| { + // If the file size doesn't fit a usize it'll be certainly exceed the limit. + try list.ensureUnusedCapacity(gpa, std.math.cast(usize, size) orelse return error.FileTooBig); + } else |err| switch (err) { + // Ignore most errors; size hint is only an optimization. + error.Unseekable, error.Unexpected, error.AccessDenied, error.PermissionDenied => {}, + else => |e| return e, + } - return file.readToEndAllocOptions(allocator, max_bytes, stat_size, alignment, optional_sentinel); + try file.readIntoArrayList(gpa, limit, alignment, list); } pub const DeleteTreeError = error{ diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 2d347eb0cd..471bf9c96d 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -1142,46 +1142,43 @@ pub fn updateTimes( try posix.futimens(self.handle, ×); } +pub const ReadAllocError = ReadError || Allocator.Error || error{FileTooBig}; + /// Reads all the bytes from the current position to the end of the file. +/// /// On success, caller owns returned buffer. -/// If the file is larger than `max_bytes`, returns `error.FileTooBig`. -pub fn readToEndAlloc(self: File, allocator: Allocator, max_bytes: usize) ![]u8 { - return self.readToEndAllocOptions(allocator, max_bytes, null, .of(u8), null); +/// +/// If `limit` is exceeded, returns `error.FileTooBig`. +pub fn readToEndAlloc(file: File, gpa: Allocator, limit: std.io.Reader.Limit) ReadAllocError![]u8 { + var buffer: std.ArrayListUnmanaged(u8) = .empty; + defer buffer.deinit(gpa); + try buffer.ensureUnusedCapacity(gpa, std.heap.page_size_min); + try readIntoArrayList(file, gpa, limit, null, &buffer); + return buffer.toOwnedSlice(gpa); } -/// Reads all the bytes from the current position to the end of the file. -/// On success, caller owns returned buffer. -/// If the file is larger than `max_bytes`, returns `error.FileTooBig`. -/// If `size_hint` is specified the initial buffer size is calculated using -/// that value, otherwise an arbitrary value is used instead. -/// Allows specifying alignment and a sentinel value. -pub fn readToEndAllocOptions( - self: File, - allocator: Allocator, - max_bytes: usize, - size_hint: ?usize, - comptime alignment: Alignment, - comptime optional_sentinel: ?u8, -) !(if (optional_sentinel) |s| [:s]align(alignment.toByteUnits()) u8 else []align(alignment.toByteUnits()) u8) { - // If no size hint is provided fall back to the size=0 code path - const size = size_hint orelse 0; - - // The file size returned by stat is used as hint to set the buffer - // size. If the reported size is zero, as it happens on Linux for files - // in /proc, a small buffer is allocated instead. - const initial_cap = @min((if (size > 0) size else 1024), max_bytes) + @intFromBool(optional_sentinel != null); - var array_list = try std.ArrayListAligned(u8, alignment).initCapacity(allocator, initial_cap); - defer array_list.deinit(); - - self.reader().readAllArrayListAligned(alignment, &array_list, max_bytes) catch |err| switch (err) { - error.StreamTooLong => return error.FileTooBig, - else => |e| return e, - }; - - if (optional_sentinel) |sentinel| { - return try array_list.toOwnedSliceSentinel(sentinel); - } else { - return try array_list.toOwnedSlice(); +/// Reads all the bytes from the current position to the end of the file, +/// appending them into the provided array list. +/// +/// If `limit` is exceeded: +/// * The array list's length is increased by exactly one byte past `limit`. +/// * The file seek position is advanced by exactly one byte past `limit`. +/// * `error.FileTooBig` is returned. +pub fn readIntoArrayList( + file: File, + gpa: Allocator, + limit: std.io.Reader.Limit, + comptime alignment: ?std.mem.Alignment, + list: *std.ArrayListAligned(u8, alignment), +) ReadAllocError!void { + var remaining = limit; + while (true) { + try list.ensureUnusedCapacity(gpa, 1); + const buffer = remaining.slice1(list.unusedCapacitySlice()); + const n = try read(file, buffer); + if (n == 0) return; + list.items.len += n; + remaining = remaining.subtract(n) orelse return error.FileTooBig; } } @@ -1584,35 +1581,19 @@ fn writeFileAllSendfile(self: File, in_file: File, args: WriteFileOptions) posix pub fn reader(file: File) std.io.Reader { return .{ .context = handleToOpaque(file.handle), - .vtable = .{ - .posRead = reader_posRead, - .posReadVec = reader_posReadVec, - .streamRead = reader_streamRead, - .streamReadVec = reader_streamReadVec, + .vtable = &.{ + .read = streamRead, + .readv = streamReadVec, }, }; } -pub fn unseekableReader(file: File) std.io.Reader { +pub fn positionalReader(file: File) std.io.PositionalReader { return .{ .context = handleToOpaque(file.handle), - .vtable = .{ - .posRead = null, - .posReadVec = null, - .streamRead = reader_streamRead, - .streamReadVec = reader_streamReadVec, - }, - }; -} - -pub fn unstreamableReader(file: File) std.io.Reader { - return .{ - .context = handleToOpaque(file.handle), - .vtable = .{ - .posRead = reader_posRead, - .posReadVec = reader_posReadVec, - .streamRead = null, - .streamReadVec = null, + .vtable = &.{ + .read = posRead, + .readv = posReadVec, }, }; } @@ -1621,8 +1602,8 @@ pub fn writer(file: File) std.io.Writer { return .{ .context = handleToOpaque(file.handle), .vtable = &.{ - .writeSplat = writer_writeSplat, - .writeFile = writer_writeFile, + .writeSplat = writeSplat, + .writeFile = writeFile, }, }; } @@ -1631,19 +1612,18 @@ pub fn writer(file: File) std.io.Writer { /// vectors through the underlying write calls as possible. const max_buffers_len = 16; -pub fn reader_posRead( +fn posRead( context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit, offset: u64, ) std.io.Reader.Result { - const file = opaqueToHandle(context); - const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file; - return writer.writeFile(bw, file, .init(offset), len, &.{}, 0); + const file = opaqueToFile(context); + return bw.writeFile(file, .init(offset), limit, &.{}, 0); } -pub fn reader_posReadVec(context: *anyopaque, data: []const []u8, offset: u64) anyerror!std.io.Reader.Status { - const file = opaqueToHandle(context); +fn posReadVec(context: *anyopaque, data: []const []u8, offset: u64) anyerror!std.io.Reader.Status { + const file = opaqueToFile(context); const n = try file.preadv(data, offset); return .{ .len = n, @@ -1651,35 +1631,57 @@ pub fn reader_posReadVec(context: *anyopaque, data: []const []u8, offset: u64) a }; } -pub fn reader_streamRead( +fn streamRead( context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit, ) anyerror!std.io.Reader.Status { - const file = opaqueToHandle(context); - const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file; - const n = try writer.writeFile(bw, file, .none, len, &.{}, 0); + const file = opaqueToFile(context); + const n = try bw.writeFile(file, .none, limit, &.{}, 0); return .{ - .len = n, + .len = @intCast(n), .end = n == 0, }; } -pub fn reader_streamReadVec(context: ?*anyopaque, data: []const []u8) anyerror!std.io.Reader.Status { - const file = opaqueToHandle(context); - const n = try file.readv(data); - return .{ - .len = n, - .end = n == 0, - }; +fn streamReadVec(context: ?*anyopaque, data: []const []u8) anyerror!std.io.Reader.Status { + const handle = opaqueToHandle(context); + + if (is_windows) { + // Unfortunately, `ReadFileScatter` cannot be used since it requires + // page alignment, so we are stuck using only the first slice. + // Avoid empty slices to prevent false positive end detections. + var i: usize = 0; + while (true) : (i += 1) { + if (i >= data.len) return .{}; + if (data[i].len > 0) break; + } + const n = try windows.ReadFile(handle, data[i], null); + return .{ .len = n, .end = n == 0 }; + } + + var iovecs: [max_buffers_len]std.posix.iovec = undefined; + var iovecs_i: usize = 0; + for (data) |d| { + // Since the OS checks pointer address before length, we must omit + // length-zero vectors. + if (d.len == 0) continue; + iovecs[iovecs_i] = .{ .base = d.ptr, .len = d.len }; + iovecs_i += 1; + if (iovecs_i >= iovecs.len) break; + } + const send_vecs = iovecs[0..iovecs_i]; + if (send_vecs.len == 0) return .{}; // Prevent false positive end detection on empty `data`. + const n = try posix.readv(handle, send_vecs); + return .{ .len = @intCast(n), .end = n == 0 }; } -pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) anyerror!usize { - const file = opaqueToHandle(context); +fn writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) anyerror!usize { + const handle = opaqueToHandle(context); var splat_buffer: [256]u8 = undefined; if (is_windows) { if (data.len == 1 and splat == 0) return 0; - return windows.WriteFile(file, data[0], null); + return windows.WriteFile(handle, data[0], null); } var iovecs: [max_buffers_len]std.posix.iovec_const = undefined; var len: usize = @min(iovecs.len, data.len); @@ -1688,8 +1690,8 @@ pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat: .len = d.len, }; switch (splat) { - 0 => return std.posix.writev(file, iovecs[0 .. len - 1]), - 1 => return std.posix.writev(file, iovecs[0..len]), + 0 => return std.posix.writev(handle, iovecs[0 .. len - 1]), + 1 => return std.posix.writev(handle, iovecs[0..len]), else => { const pattern = data[data.len - 1]; if (pattern.len == 1) { @@ -1707,38 +1709,38 @@ pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat: iovecs[len] = .{ .base = &splat_buffer, .len = remaining_splat }; len += 1; } - return std.posix.writev(file, iovecs[0..len]); + return std.posix.writev(handle, iovecs[0..len]); } }, } - return std.posix.writev(file, iovecs[0..len]); + return std.posix.writev(handle, iovecs[0..len]); } -pub fn writer_writeFile( +fn writeFile( context: ?*anyopaque, in_file: std.fs.File, in_offset: std.io.Writer.Offset, - in_len: std.io.Writer.FileLen, + in_limit: std.io.Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { const out_fd = opaqueToHandle(context); const in_fd = in_file.handle; - const len_int = switch (in_len) { - .zero => return writer_writeSplat(context, headers_and_trailers, 1), - .entire_file => 0, - else => in_len.int(), + const len_int = switch (in_limit) { + .zero => return writeSplat(context, headers_and_trailers, 1), + .none => 0, + else => in_limit.toInt().?, }; if (native_os == .linux) sf: { // Linux sendfile does not support headers or trailers but it does // support a streaming read from in_file. - if (headers_len > 0) return writer_writeSplat(context, headers_and_trailers[0..headers_len], 1); + if (headers_len > 0) return writeSplat(context, headers_and_trailers[0..headers_len], 1); const max_count = 0x7ffff000; // Avoid EINVAL. const smaller_len = if (len_int == 0) max_count else @min(len_int, max_count); var off: std.os.linux.off_t = undefined; const off_ptr: ?*std.os.linux.off_t = if (in_offset.toInt()) |offset| b: { off = std.math.cast(std.os.linux.off_t, offset) orelse - return writer_writeSplat(context, headers_and_trailers, 1); + return writeSplat(context, headers_and_trailers, 1); break :b &off; } else null; if (true) @panic("TODO"); @@ -1753,7 +1755,7 @@ pub fn writer_writeFile( } else if (n == 0 and len_int == 0) { // The caller wouldn't be able to tell that the file transfer is // done and would incorrectly repeat the same call. - return writer_writeSplat(context, headers_and_trailers, 1); + return writeSplat(context, headers_and_trailers, 1); } return n; } @@ -1770,7 +1772,7 @@ pub fn writer_writeFile( error.FileDescriptorNotASocket, error.NetworkUnreachable, error.NetworkSubsystemFailed, - => return writeFileUnseekable(out_fd, in_fd, in_offset, in_len, headers_and_trailers, headers_len), + => return writeFileUnseekable(out_fd, in_fd, in_offset, in_limit, headers_and_trailers, headers_len), else => |e| return e, }; @@ -1780,14 +1782,14 @@ fn writeFileUnseekable( out_fd: Handle, in_fd: Handle, in_offset: u64, - in_len: std.io.Writer.FileLen, + in_limit: std.io.Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { _ = out_fd; _ = in_fd; _ = in_offset; - _ = in_len; + _ = in_limit; _ = headers_and_trailers; _ = headers_len; @panic("TODO writeFileUnseekable"); @@ -1809,6 +1811,10 @@ fn opaqueToHandle(userdata: ?*anyopaque) Handle { }; } +fn opaqueToFile(userdata: ?*anyopaque) File { + return .{ .handle = opaqueToHandle(userdata) }; +} + pub const SeekableStream = io.SeekableStream( File, SeekError, diff --git a/lib/std/io/BufferedReader.zig b/lib/std/io/BufferedReader.zig index 2a5f1059b1..b2df42b454 100644 --- a/lib/std/io/BufferedReader.zig +++ b/lib/std/io/BufferedReader.zig @@ -43,14 +43,14 @@ fn eof_writeFile( context: ?*anyopaque, file: std.fs.File, offset: std.io.Writer.Offset, - len: std.io.Writer.FileLen, + limit: std.io.Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { _ = context; _ = file; _ = offset; - _ = len; + _ = limit; _ = headers_and_trailers; _ = headers_len; return error.NoSpaceLeft; diff --git a/lib/std/io/BufferedWriter.zig b/lib/std/io/BufferedWriter.zig index 1bc38da66f..e5c898b596 100644 --- a/lib/std/io/BufferedWriter.zig +++ b/lib/std/io/BufferedWriter.zig @@ -410,19 +410,19 @@ pub fn writeStructEndian(bw: *BufferedWriter, value: anytype, endian: std.builti pub fn writeFile( bw: *BufferedWriter, file: std.fs.File, - offset: u64, - len: Writer.FileLen, + offset: Writer.Offset, + limit: Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { - return passthru_writeFile(bw, file, offset, len, headers_and_trailers, headers_len); + return passthru_writeFile(bw, file, offset, limit, headers_and_trailers, headers_len); } fn passthru_writeFile( context: ?*anyopaque, file: std.fs.File, - offset: u64, - len: Writer.FileLen, + offset: Writer.Offset, + limit: Writer.Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { @@ -430,7 +430,7 @@ fn passthru_writeFile( const buffer = bw.buffer; if (buffer.len == 0) return track( &bw.count, - try bw.unbuffered_writer.writeFile(file, offset, len, headers_and_trailers, headers_len), + try bw.unbuffered_writer.writeFile(file, offset, limit, headers_and_trailers, headers_len), ); const start_end = bw.end; const headers = headers_and_trailers[0..headers_len]; @@ -457,7 +457,7 @@ fn passthru_writeFile( @memcpy(remaining_buffers_for_trailers[0..send_trailers_len], trailers[0..send_trailers_len]); const send_headers_len = 1 + buffers_len; const send_buffers = buffers[0 .. send_headers_len + send_trailers_len]; - const n = try bw.unbuffered_writer.writeFile(file, offset, len, send_buffers, send_headers_len); + const n = try bw.unbuffered_writer.writeFile(file, offset, limit, send_buffers, send_headers_len); if (n < end) { @branchHint(.unlikely); const remainder = buffer[n..end]; @@ -487,7 +487,7 @@ fn passthru_writeFile( @memcpy(remaining_buffers[0..send_trailers_len], trailers[0..send_trailers_len]); const send_headers_len = 1; const send_buffers = buffers[0 .. send_headers_len + send_trailers_len]; - const n = try bw.unbuffered_writer.writeFile(file, offset, len, send_buffers, send_headers_len); + const n = try bw.unbuffered_writer.writeFile(file, offset, limit, send_buffers, send_headers_len); if (n < end) { @branchHint(.unlikely); const remainder = buffer[n..end]; @@ -500,26 +500,26 @@ fn passthru_writeFile( } pub const WriteFileOptions = struct { - offset: u64 = 0, + offset: Writer.Offset = .none, /// If the size of the source file is known, it is likely that passing the /// size here will save one syscall. - len: Writer.FileLen = .entire_file, + limit: Writer.Limit = .none, /// Headers and trailers must be passed together so that in case `len` is /// zero, they can be forwarded directly to `Writer.VTable.writev`. /// /// The parameter is mutable because this function needs to mutate the /// fields in order to handle partial writes from `Writer.VTable.writeFile`. headers_and_trailers: [][]const u8 = &.{}, - /// The number of trailers is inferred from `headers_and_trailers.len - - /// headers_len`. + /// The number of trailers is inferred from + /// `headers_and_trailers.len - headers_len`. headers_len: usize = 0, }; pub fn writeFileAll(bw: *BufferedWriter, file: std.fs.File, options: WriteFileOptions) anyerror!void { const headers_and_trailers = options.headers_and_trailers; const headers = headers_and_trailers[0..options.headers_len]; - if (options.len == .zero) return writevAll(bw, headers_and_trailers); - if (options.len == .entire_file) { + if (options.limit == .zero) return writevAll(bw, headers_and_trailers); + if (options.limit == .none) { // When reading the whole file, we cannot include the trailers in the // call that reads from the file handle, because we have no way to // determine whether a partial write is past the end of the file or @@ -540,7 +540,7 @@ pub fn writeFileAll(bw: *BufferedWriter, file: std.fs.File, options: WriteFileOp offset += n; } } else { - var len = options.len.int(); + var len = options.limit.toInt().?; var i: usize = 0; var offset = options.offset; while (true) { diff --git a/lib/std/io/Reader.zig b/lib/std/io/Reader.zig index 76d7ab5c51..c562f5b8d5 100644 --- a/lib/std/io/Reader.zig +++ b/lib/std/io/Reader.zig @@ -48,11 +48,45 @@ pub const Status = packed struct(usize) { }; pub const Limit = enum(usize) { + zero = 0, none = std.math.maxInt(usize), _, - pub fn min(l: Limit, int: usize) usize { - return @min(int, @intFromEnum(l)); + /// `std.math.maxInt(usize)` is interpreted to mean "no limit". + pub fn init(n: usize) Limit { + return @enumFromInt(n); + } + + pub fn min(l: Limit, n: usize) usize { + return @min(n, @intFromEnum(l)); + } + + pub fn slice(l: Limit, s: []u8) []u8 { + return s[0..min(l, s.len)]; + } + + pub fn toInt(l: Limit) ?usize { + return if (l == .none) null else @intFromEnum(l); + } + + /// Reduces a slice to account for the limit, leaving room for one extra + /// byte above the limit, allowing for the use case of differentiating + /// between end-of-stream and reaching the limit. + pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 { + assert(non_empty_buffer.len >= 1); + return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)]; + } + + pub fn nonzero(l: Limit) bool { + return @intFromEnum(l) > 0; + } + + /// Return a new limit reduced by `amount` or return `null` indicating + /// limit would be exceeded. + pub fn subtract(l: Limit, amount: usize) ?Limit { + if (l == .none) return .{ .next = .none }; + if (amount > @intFromEnum(l)) return null; + return @enumFromInt(@intFromEnum(l) - amount); } }; diff --git a/lib/std/io/Writer.zig b/lib/std/io/Writer.zig index 35847a0c20..6d4f3fe171 100644 --- a/lib/std/io/Writer.zig +++ b/lib/std/io/Writer.zig @@ -31,9 +31,11 @@ pub const VTable = struct { writeFile: *const fn ( ctx: ?*anyopaque, file: std.fs.File, + /// If this is `none`, `file` will be streamed. Otherwise, it will be + /// read positionally without affecting the seek position. offset: Offset, - /// When zero, it means copy until the end of the file is reached. - len: FileLen, + /// Maximum amount of bytes to read from the file. + limit: Limit, /// Headers and trailers must be passed together so that in case `len` is /// zero, they can be forwarded directly to `VTable.writev`. headers_and_trailers: []const []const u8, @@ -41,7 +43,10 @@ pub const VTable = struct { ) anyerror!usize, }; +pub const Limit = std.io.Reader.Limit; + pub const Offset = enum(u64) { + zero = 0, /// Indicates to read the file as a stream. none = std.math.maxInt(u64), _, @@ -53,24 +58,7 @@ pub const Offset = enum(u64) { } pub fn toInt(o: Offset) ?u64 { - if (o == .none) return null; - return @intFromEnum(o); - } -}; - -pub const FileLen = enum(u64) { - zero = 0, - entire_file = std.math.maxInt(u64), - _, - - pub fn init(integer: u64) FileLen { - const result: FileLen = @enumFromInt(integer); - assert(result != .entire_file); - return result; - } - - pub fn int(len: FileLen) u64 { - return @intFromEnum(len); + return if (o == .none) null else @intFromEnum(o); } }; @@ -85,26 +73,26 @@ pub fn writeSplat(w: Writer, data: []const []const u8, splat: usize) anyerror!us pub fn writeFile( w: Writer, file: std.fs.File, - offset: u64, - len: FileLen, + offset: Offset, + limit: Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { - return w.vtable.writeFile(w.context, file, offset, len, headers_and_trailers, headers_len); + return w.vtable.writeFile(w.context, file, offset, limit, headers_and_trailers, headers_len); } pub fn unimplemented_writeFile( context: ?*anyopaque, file: std.fs.File, offset: Offset, - len: FileLen, + limit: Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { _ = context; _ = file; _ = offset; - _ = len; + _ = limit; _ = headers_and_trailers; _ = headers_len; return error.Unimplemented; @@ -143,7 +131,7 @@ fn null_writeFile( context: ?*anyopaque, file: std.fs.File, offset: Offset, - len: FileLen, + limit: Limit, headers_and_trailers: []const []const u8, headers_len: usize, ) anyerror!usize { @@ -152,7 +140,7 @@ fn null_writeFile( if (offset == .none) { @panic("TODO seek the file forwards"); } - if (len == .entire_file) { + const limit_int = limit.toInt() orelse { const headers = headers_and_trailers[0..headers_len]; for (headers) |bytes| n += bytes.len; if (offset.toInt()) |off| { @@ -162,9 +150,9 @@ fn null_writeFile( return n; } @panic("TODO stream from file until eof, counting"); - } + }; for (headers_and_trailers) |bytes| n += bytes.len; - return len.int() + n; + return limit_int + n; } test @"null" { diff --git a/lib/std/zig.zig b/lib/std/zig.zig index 4d6ba2b56a..4401b5fc80 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -543,20 +543,18 @@ test isUnderscore { try std.testing.expect(!isUnderscore("\\x5f")); } -pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?usize) ![:0]u8 { - const source_code = input.readToEndAllocOptions( - gpa, - max_src_size, - size_hint, - .of(u8), - 0, - ) catch |err| switch (err) { +pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: usize) ![:0]u8 { + var buffer: std.ArrayListAlignedUnmanaged(u8, .@"2") = .empty; + defer buffer.deinit(gpa); + + try buffer.ensureUnusedCapacity(size_hint); + + input.readIntoArrayList(gpa, .init(max_src_size), .@"2", &buffer) catch |err| switch (err) { error.ConnectionResetByPeer => unreachable, error.ConnectionTimedOut => unreachable, error.NotOpenForReading => unreachable, else => |e| return e, }; - errdefer gpa.free(source_code); // Detect unsupported file types with their Byte Order Mark const unsupported_boms = [_][]const u8{ @@ -565,30 +563,23 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ? "\xfe\xff", // UTF-16 big endian }; for (unsupported_boms) |bom| { - if (std.mem.startsWith(u8, source_code, bom)) { + if (std.mem.startsWith(u8, buffer.items, bom)) { return error.UnsupportedEncoding; } } // If the file starts with a UTF-16 little endian BOM, translate it to UTF-8 - if (std.mem.startsWith(u8, source_code, "\xff\xfe")) { - if (source_code.len % 2 != 0) return error.InvalidEncoding; - // TODO: after wrangle-writer-buffering branch is merged, - // avoid this unnecessary allocation - const aligned_copy = try gpa.alloc(u16, source_code.len / 2); - defer gpa.free(aligned_copy); - @memcpy(std.mem.sliceAsBytes(aligned_copy), source_code); - const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(gpa, aligned_copy) catch |err| switch (err) { + if (std.mem.startsWith(u8, buffer.items, "\xff\xfe")) { + if (buffer.items.len % 2 != 0) return error.InvalidEncoding; + return std.unicode.utf16LeToUtf8AllocZ(gpa, buffer.items) catch |err| switch (err) { error.DanglingSurrogateHalf => error.UnsupportedEncoding, error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding, error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding, else => |e| return e, }; - gpa.free(source_code); - return source_code_utf8; } - return source_code; + return buffer.toOwnedSliceSentinel(0); } pub fn printAstErrorsToStderr(gpa: Allocator, tree: Ast, path: []const u8, color: Color) !void {