tweak std.io.Writer and followups

remove std.fs.Dir.readFileAllocOptions, replace with more flexible API
readFileIntoArrayList

remove std.fs.File.readToEndAllocOptions, replace with more flexible API
readIntoArrayList

update std.fs.File to new reader/writer API

add helper functions to std.io.Reader.Limit

replace std.io.Writer.FileLen with std.io.Reader.Limit

make offset a type rather than u64 so that it can distinguish between
streaming read and positional read

avoid an unnecessary allocation in std.zig.readSourceFileToEndAlloc when
there is a UTF-16 little endian BOM.
This commit is contained in:
Andrew Kelley 2025-04-13 18:57:51 -07:00
parent 383afd19d7
commit 1164d5ece5
8 changed files with 261 additions and 208 deletions

View File

@ -1,13 +1,12 @@
const builtin = @import("builtin");
const std = @import("std");
const mem = std.mem;
const io = std.io;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const Cache = std.Build.Cache;
fn usage() noreturn {
io.getStdOut().writeAll(
std.fs.File.stdout().writeAll(
\\Usage: zig std [options]
\\
\\Options:
@ -63,7 +62,7 @@ pub fn main() !void {
var http_server = try address.listen(.{});
const port = http_server.listen_address.in.getPort();
const url_with_newline = try std.fmt.allocPrint(arena, "http://127.0.0.1:{d}/\n", .{port});
std.io.getStdOut().writeAll(url_with_newline) catch {};
std.fs.File.stdout().writeAll(url_with_newline) catch {};
if (should_open_browser) {
openBrowserTab(gpa, url_with_newline[0 .. url_with_newline.len - 1 :'\n']) catch |err| {
std.log.err("unable to open browser: {s}", .{@errorName(err)});
@ -155,18 +154,29 @@ fn serveDocsFile(
name: []const u8,
content_type: []const u8,
) !void {
const gpa = context.gpa;
// The desired API is actually sendfile, which will require enhancing std.http.Server.
// We load the file with every request so that the user can make changes to the file
// and refresh the HTML page without restarting this server.
const file_contents = try context.lib_dir.readFileAlloc(gpa, name, 10 * 1024 * 1024);
defer gpa.free(file_contents);
try request.respond(file_contents, .{
.extra_headers = &.{
.{ .name = "content-type", .value = content_type },
cache_control_header,
// Open the file with every request so that the user can make changes to
// the file and refresh the HTML page without restarting this server.
var file = try context.lib_dir.openFile(name, .{});
defer file.close();
const content_length = std.math.cast(usize, (try file.stat()).size) orelse return error.FileTooBig;
var send_buffer: [4000]u8 = undefined;
var response = request.respondStreaming(.{
.send_buffer = &send_buffer,
.content_length = content_length,
.respond_options = .{
.extra_headers = &.{
.{ .name = "content-type", .value = content_type },
cache_control_header,
},
},
});
try response.writer().unbuffered().writeFileAll(file, .{
.offset = .zero,
.limit = .init(content_length),
});
try response.end();
}
fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void {

View File

@ -1963,41 +1963,65 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {
return buffer[0..end_index];
}
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
return self.readFileAllocOptions(allocator, file_path, max_bytes, null, .of(u8), null);
/// Reads all the bytes from the named file. On success, caller owns returned
/// buffer.
pub fn readFileAlloc(
dir: Dir,
/// On Windows, should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, should be encoded as valid UTF-8.
/// On other platforms, an opaque sequence of bytes with no particular encoding.
file_path: []const u8,
/// Used to allocate the result.
gpa: mem.Allocator,
/// If exceeded:
/// * The array list's length is increased by exactly one byte past `limit`.
/// * The file seek position is advanced by exactly one byte past `limit`.
/// * `error.FileTooBig` is returned.
limit: std.io.Reader.Limit,
) (File.OpenError || File.ReadAllocError)![]u8 {
var buffer: std.ArrayListUnmanaged(u8) = .empty;
defer buffer.deinit(gpa);
try readFileIntoArrayList(dir, file_path, gpa, limit, null, &buffer);
return buffer.toOwnedSlice(gpa);
}
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
/// If `size_hint` is specified the initial buffer size is calculated using
/// that value, otherwise the effective file size is used instead.
/// Allows specifying alignment and a sentinel value.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAllocOptions(
self: Dir,
allocator: mem.Allocator,
/// Reads all the bytes from the named file, appending them into the provided
/// array list.
///
/// If `limit` is exceeded:
/// * The array list's length is increased by exactly one byte past `limit`.
/// * The file seek position is advanced by exactly one byte past `limit`.
/// * `error.FileTooBig` is returned.
pub fn readFileIntoArrayList(
dir: Dir,
/// On Windows, should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, should be encoded as valid UTF-8.
/// On other platforms, an opaque sequence of bytes with no particular encoding.
file_path: []const u8,
max_bytes: usize,
gpa: Allocator,
limit: std.io.Reader.Limit,
/// If specified, the initial buffer size is calculated using this value,
/// otherwise the effective file size is used instead.
size_hint: ?usize,
comptime alignment: std.mem.Alignment,
comptime optional_sentinel: ?u8,
) !(if (optional_sentinel) |s| [:s]align(alignment.toByteUnits()) u8 else []align(alignment.toByteUnits()) u8) {
var file = try self.openFile(file_path, .{});
comptime alignment: ?std.mem.Alignment,
list: *std.ArrayListAligned(u8, alignment),
) (File.OpenError || File.ReadAllocError)!void {
var file = try dir.openFile(file_path, .{});
defer file.close();
// If the file size doesn't fit a usize it'll be certainly greater than
// `max_bytes`
const stat_size = size_hint orelse std.math.cast(usize, try file.getEndPos()) orelse
return error.FileTooBig;
// Apply size hint by adjusting the array list's capacity.
if (size_hint) |size| {
try list.ensureUnusedCapacity(gpa, size);
} else if (file.getEndPos()) |size| {
// If the file size doesn't fit a usize it'll be certainly exceed the limit.
try list.ensureUnusedCapacity(gpa, std.math.cast(usize, size) orelse return error.FileTooBig);
} else |err| switch (err) {
// Ignore most errors; size hint is only an optimization.
error.Unseekable, error.Unexpected, error.AccessDenied, error.PermissionDenied => {},
else => |e| return e,
}
return file.readToEndAllocOptions(allocator, max_bytes, stat_size, alignment, optional_sentinel);
try file.readIntoArrayList(gpa, limit, alignment, list);
}
pub const DeleteTreeError = error{

View File

@ -1142,46 +1142,43 @@ pub fn updateTimes(
try posix.futimens(self.handle, &times);
}
pub const ReadAllocError = ReadError || Allocator.Error || error{FileTooBig};
/// Reads all the bytes from the current position to the end of the file.
///
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
pub fn readToEndAlloc(self: File, allocator: Allocator, max_bytes: usize) ![]u8 {
return self.readToEndAllocOptions(allocator, max_bytes, null, .of(u8), null);
///
/// If `limit` is exceeded, returns `error.FileTooBig`.
pub fn readToEndAlloc(file: File, gpa: Allocator, limit: std.io.Reader.Limit) ReadAllocError![]u8 {
var buffer: std.ArrayListUnmanaged(u8) = .empty;
defer buffer.deinit(gpa);
try buffer.ensureUnusedCapacity(gpa, std.heap.page_size_min);
try readIntoArrayList(file, gpa, limit, null, &buffer);
return buffer.toOwnedSlice(gpa);
}
/// Reads all the bytes from the current position to the end of the file.
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
/// If `size_hint` is specified the initial buffer size is calculated using
/// that value, otherwise an arbitrary value is used instead.
/// Allows specifying alignment and a sentinel value.
pub fn readToEndAllocOptions(
self: File,
allocator: Allocator,
max_bytes: usize,
size_hint: ?usize,
comptime alignment: Alignment,
comptime optional_sentinel: ?u8,
) !(if (optional_sentinel) |s| [:s]align(alignment.toByteUnits()) u8 else []align(alignment.toByteUnits()) u8) {
// If no size hint is provided fall back to the size=0 code path
const size = size_hint orelse 0;
// The file size returned by stat is used as hint to set the buffer
// size. If the reported size is zero, as it happens on Linux for files
// in /proc, a small buffer is allocated instead.
const initial_cap = @min((if (size > 0) size else 1024), max_bytes) + @intFromBool(optional_sentinel != null);
var array_list = try std.ArrayListAligned(u8, alignment).initCapacity(allocator, initial_cap);
defer array_list.deinit();
self.reader().readAllArrayListAligned(alignment, &array_list, max_bytes) catch |err| switch (err) {
error.StreamTooLong => return error.FileTooBig,
else => |e| return e,
};
if (optional_sentinel) |sentinel| {
return try array_list.toOwnedSliceSentinel(sentinel);
} else {
return try array_list.toOwnedSlice();
/// Reads all the bytes from the current position to the end of the file,
/// appending them into the provided array list.
///
/// If `limit` is exceeded:
/// * The array list's length is increased by exactly one byte past `limit`.
/// * The file seek position is advanced by exactly one byte past `limit`.
/// * `error.FileTooBig` is returned.
pub fn readIntoArrayList(
file: File,
gpa: Allocator,
limit: std.io.Reader.Limit,
comptime alignment: ?std.mem.Alignment,
list: *std.ArrayListAligned(u8, alignment),
) ReadAllocError!void {
var remaining = limit;
while (true) {
try list.ensureUnusedCapacity(gpa, 1);
const buffer = remaining.slice1(list.unusedCapacitySlice());
const n = try read(file, buffer);
if (n == 0) return;
list.items.len += n;
remaining = remaining.subtract(n) orelse return error.FileTooBig;
}
}
@ -1584,35 +1581,19 @@ fn writeFileAllSendfile(self: File, in_file: File, args: WriteFileOptions) posix
pub fn reader(file: File) std.io.Reader {
return .{
.context = handleToOpaque(file.handle),
.vtable = .{
.posRead = reader_posRead,
.posReadVec = reader_posReadVec,
.streamRead = reader_streamRead,
.streamReadVec = reader_streamReadVec,
.vtable = &.{
.read = streamRead,
.readv = streamReadVec,
},
};
}
pub fn unseekableReader(file: File) std.io.Reader {
pub fn positionalReader(file: File) std.io.PositionalReader {
return .{
.context = handleToOpaque(file.handle),
.vtable = .{
.posRead = null,
.posReadVec = null,
.streamRead = reader_streamRead,
.streamReadVec = reader_streamReadVec,
},
};
}
pub fn unstreamableReader(file: File) std.io.Reader {
return .{
.context = handleToOpaque(file.handle),
.vtable = .{
.posRead = reader_posRead,
.posReadVec = reader_posReadVec,
.streamRead = null,
.streamReadVec = null,
.vtable = &.{
.read = posRead,
.readv = posReadVec,
},
};
}
@ -1621,8 +1602,8 @@ pub fn writer(file: File) std.io.Writer {
return .{
.context = handleToOpaque(file.handle),
.vtable = &.{
.writeSplat = writer_writeSplat,
.writeFile = writer_writeFile,
.writeSplat = writeSplat,
.writeFile = writeFile,
},
};
}
@ -1631,19 +1612,18 @@ pub fn writer(file: File) std.io.Writer {
/// vectors through the underlying write calls as possible.
const max_buffers_len = 16;
pub fn reader_posRead(
fn posRead(
context: ?*anyopaque,
bw: *std.io.BufferedWriter,
limit: std.io.Reader.Limit,
offset: u64,
) std.io.Reader.Result {
const file = opaqueToHandle(context);
const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file;
return writer.writeFile(bw, file, .init(offset), len, &.{}, 0);
const file = opaqueToFile(context);
return bw.writeFile(file, .init(offset), limit, &.{}, 0);
}
pub fn reader_posReadVec(context: *anyopaque, data: []const []u8, offset: u64) anyerror!std.io.Reader.Status {
const file = opaqueToHandle(context);
fn posReadVec(context: *anyopaque, data: []const []u8, offset: u64) anyerror!std.io.Reader.Status {
const file = opaqueToFile(context);
const n = try file.preadv(data, offset);
return .{
.len = n,
@ -1651,35 +1631,57 @@ pub fn reader_posReadVec(context: *anyopaque, data: []const []u8, offset: u64) a
};
}
pub fn reader_streamRead(
fn streamRead(
context: ?*anyopaque,
bw: *std.io.BufferedWriter,
limit: std.io.Reader.Limit,
) anyerror!std.io.Reader.Status {
const file = opaqueToHandle(context);
const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file;
const n = try writer.writeFile(bw, file, .none, len, &.{}, 0);
const file = opaqueToFile(context);
const n = try bw.writeFile(file, .none, limit, &.{}, 0);
return .{
.len = n,
.len = @intCast(n),
.end = n == 0,
};
}
pub fn reader_streamReadVec(context: ?*anyopaque, data: []const []u8) anyerror!std.io.Reader.Status {
const file = opaqueToHandle(context);
const n = try file.readv(data);
return .{
.len = n,
.end = n == 0,
};
fn streamReadVec(context: ?*anyopaque, data: []const []u8) anyerror!std.io.Reader.Status {
const handle = opaqueToHandle(context);
if (is_windows) {
// Unfortunately, `ReadFileScatter` cannot be used since it requires
// page alignment, so we are stuck using only the first slice.
// Avoid empty slices to prevent false positive end detections.
var i: usize = 0;
while (true) : (i += 1) {
if (i >= data.len) return .{};
if (data[i].len > 0) break;
}
const n = try windows.ReadFile(handle, data[i], null);
return .{ .len = n, .end = n == 0 };
}
var iovecs: [max_buffers_len]std.posix.iovec = undefined;
var iovecs_i: usize = 0;
for (data) |d| {
// Since the OS checks pointer address before length, we must omit
// length-zero vectors.
if (d.len == 0) continue;
iovecs[iovecs_i] = .{ .base = d.ptr, .len = d.len };
iovecs_i += 1;
if (iovecs_i >= iovecs.len) break;
}
const send_vecs = iovecs[0..iovecs_i];
if (send_vecs.len == 0) return .{}; // Prevent false positive end detection on empty `data`.
const n = try posix.readv(handle, send_vecs);
return .{ .len = @intCast(n), .end = n == 0 };
}
pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) anyerror!usize {
const file = opaqueToHandle(context);
fn writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) anyerror!usize {
const handle = opaqueToHandle(context);
var splat_buffer: [256]u8 = undefined;
if (is_windows) {
if (data.len == 1 and splat == 0) return 0;
return windows.WriteFile(file, data[0], null);
return windows.WriteFile(handle, data[0], null);
}
var iovecs: [max_buffers_len]std.posix.iovec_const = undefined;
var len: usize = @min(iovecs.len, data.len);
@ -1688,8 +1690,8 @@ pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat:
.len = d.len,
};
switch (splat) {
0 => return std.posix.writev(file, iovecs[0 .. len - 1]),
1 => return std.posix.writev(file, iovecs[0..len]),
0 => return std.posix.writev(handle, iovecs[0 .. len - 1]),
1 => return std.posix.writev(handle, iovecs[0..len]),
else => {
const pattern = data[data.len - 1];
if (pattern.len == 1) {
@ -1707,38 +1709,38 @@ pub fn writer_writeSplat(context: ?*anyopaque, data: []const []const u8, splat:
iovecs[len] = .{ .base = &splat_buffer, .len = remaining_splat };
len += 1;
}
return std.posix.writev(file, iovecs[0..len]);
return std.posix.writev(handle, iovecs[0..len]);
}
},
}
return std.posix.writev(file, iovecs[0..len]);
return std.posix.writev(handle, iovecs[0..len]);
}
pub fn writer_writeFile(
fn writeFile(
context: ?*anyopaque,
in_file: std.fs.File,
in_offset: std.io.Writer.Offset,
in_len: std.io.Writer.FileLen,
in_limit: std.io.Writer.Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
const out_fd = opaqueToHandle(context);
const in_fd = in_file.handle;
const len_int = switch (in_len) {
.zero => return writer_writeSplat(context, headers_and_trailers, 1),
.entire_file => 0,
else => in_len.int(),
const len_int = switch (in_limit) {
.zero => return writeSplat(context, headers_and_trailers, 1),
.none => 0,
else => in_limit.toInt().?,
};
if (native_os == .linux) sf: {
// Linux sendfile does not support headers or trailers but it does
// support a streaming read from in_file.
if (headers_len > 0) return writer_writeSplat(context, headers_and_trailers[0..headers_len], 1);
if (headers_len > 0) return writeSplat(context, headers_and_trailers[0..headers_len], 1);
const max_count = 0x7ffff000; // Avoid EINVAL.
const smaller_len = if (len_int == 0) max_count else @min(len_int, max_count);
var off: std.os.linux.off_t = undefined;
const off_ptr: ?*std.os.linux.off_t = if (in_offset.toInt()) |offset| b: {
off = std.math.cast(std.os.linux.off_t, offset) orelse
return writer_writeSplat(context, headers_and_trailers, 1);
return writeSplat(context, headers_and_trailers, 1);
break :b &off;
} else null;
if (true) @panic("TODO");
@ -1753,7 +1755,7 @@ pub fn writer_writeFile(
} else if (n == 0 and len_int == 0) {
// The caller wouldn't be able to tell that the file transfer is
// done and would incorrectly repeat the same call.
return writer_writeSplat(context, headers_and_trailers, 1);
return writeSplat(context, headers_and_trailers, 1);
}
return n;
}
@ -1770,7 +1772,7 @@ pub fn writer_writeFile(
error.FileDescriptorNotASocket,
error.NetworkUnreachable,
error.NetworkSubsystemFailed,
=> return writeFileUnseekable(out_fd, in_fd, in_offset, in_len, headers_and_trailers, headers_len),
=> return writeFileUnseekable(out_fd, in_fd, in_offset, in_limit, headers_and_trailers, headers_len),
else => |e| return e,
};
@ -1780,14 +1782,14 @@ fn writeFileUnseekable(
out_fd: Handle,
in_fd: Handle,
in_offset: u64,
in_len: std.io.Writer.FileLen,
in_limit: std.io.Writer.Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
_ = out_fd;
_ = in_fd;
_ = in_offset;
_ = in_len;
_ = in_limit;
_ = headers_and_trailers;
_ = headers_len;
@panic("TODO writeFileUnseekable");
@ -1809,6 +1811,10 @@ fn opaqueToHandle(userdata: ?*anyopaque) Handle {
};
}
fn opaqueToFile(userdata: ?*anyopaque) File {
return .{ .handle = opaqueToHandle(userdata) };
}
pub const SeekableStream = io.SeekableStream(
File,
SeekError,

View File

@ -43,14 +43,14 @@ fn eof_writeFile(
context: ?*anyopaque,
file: std.fs.File,
offset: std.io.Writer.Offset,
len: std.io.Writer.FileLen,
limit: std.io.Writer.Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
_ = context;
_ = file;
_ = offset;
_ = len;
_ = limit;
_ = headers_and_trailers;
_ = headers_len;
return error.NoSpaceLeft;

View File

@ -410,19 +410,19 @@ pub fn writeStructEndian(bw: *BufferedWriter, value: anytype, endian: std.builti
pub fn writeFile(
bw: *BufferedWriter,
file: std.fs.File,
offset: u64,
len: Writer.FileLen,
offset: Writer.Offset,
limit: Writer.Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
return passthru_writeFile(bw, file, offset, len, headers_and_trailers, headers_len);
return passthru_writeFile(bw, file, offset, limit, headers_and_trailers, headers_len);
}
fn passthru_writeFile(
context: ?*anyopaque,
file: std.fs.File,
offset: u64,
len: Writer.FileLen,
offset: Writer.Offset,
limit: Writer.Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
@ -430,7 +430,7 @@ fn passthru_writeFile(
const buffer = bw.buffer;
if (buffer.len == 0) return track(
&bw.count,
try bw.unbuffered_writer.writeFile(file, offset, len, headers_and_trailers, headers_len),
try bw.unbuffered_writer.writeFile(file, offset, limit, headers_and_trailers, headers_len),
);
const start_end = bw.end;
const headers = headers_and_trailers[0..headers_len];
@ -457,7 +457,7 @@ fn passthru_writeFile(
@memcpy(remaining_buffers_for_trailers[0..send_trailers_len], trailers[0..send_trailers_len]);
const send_headers_len = 1 + buffers_len;
const send_buffers = buffers[0 .. send_headers_len + send_trailers_len];
const n = try bw.unbuffered_writer.writeFile(file, offset, len, send_buffers, send_headers_len);
const n = try bw.unbuffered_writer.writeFile(file, offset, limit, send_buffers, send_headers_len);
if (n < end) {
@branchHint(.unlikely);
const remainder = buffer[n..end];
@ -487,7 +487,7 @@ fn passthru_writeFile(
@memcpy(remaining_buffers[0..send_trailers_len], trailers[0..send_trailers_len]);
const send_headers_len = 1;
const send_buffers = buffers[0 .. send_headers_len + send_trailers_len];
const n = try bw.unbuffered_writer.writeFile(file, offset, len, send_buffers, send_headers_len);
const n = try bw.unbuffered_writer.writeFile(file, offset, limit, send_buffers, send_headers_len);
if (n < end) {
@branchHint(.unlikely);
const remainder = buffer[n..end];
@ -500,26 +500,26 @@ fn passthru_writeFile(
}
pub const WriteFileOptions = struct {
offset: u64 = 0,
offset: Writer.Offset = .none,
/// If the size of the source file is known, it is likely that passing the
/// size here will save one syscall.
len: Writer.FileLen = .entire_file,
limit: Writer.Limit = .none,
/// Headers and trailers must be passed together so that in case `len` is
/// zero, they can be forwarded directly to `Writer.VTable.writev`.
///
/// The parameter is mutable because this function needs to mutate the
/// fields in order to handle partial writes from `Writer.VTable.writeFile`.
headers_and_trailers: [][]const u8 = &.{},
/// The number of trailers is inferred from `headers_and_trailers.len -
/// headers_len`.
/// The number of trailers is inferred from
/// `headers_and_trailers.len - headers_len`.
headers_len: usize = 0,
};
pub fn writeFileAll(bw: *BufferedWriter, file: std.fs.File, options: WriteFileOptions) anyerror!void {
const headers_and_trailers = options.headers_and_trailers;
const headers = headers_and_trailers[0..options.headers_len];
if (options.len == .zero) return writevAll(bw, headers_and_trailers);
if (options.len == .entire_file) {
if (options.limit == .zero) return writevAll(bw, headers_and_trailers);
if (options.limit == .none) {
// When reading the whole file, we cannot include the trailers in the
// call that reads from the file handle, because we have no way to
// determine whether a partial write is past the end of the file or
@ -540,7 +540,7 @@ pub fn writeFileAll(bw: *BufferedWriter, file: std.fs.File, options: WriteFileOp
offset += n;
}
} else {
var len = options.len.int();
var len = options.limit.toInt().?;
var i: usize = 0;
var offset = options.offset;
while (true) {

View File

@ -48,11 +48,45 @@ pub const Status = packed struct(usize) {
};
pub const Limit = enum(usize) {
zero = 0,
none = std.math.maxInt(usize),
_,
pub fn min(l: Limit, int: usize) usize {
return @min(int, @intFromEnum(l));
/// `std.math.maxInt(usize)` is interpreted to mean "no limit".
pub fn init(n: usize) Limit {
return @enumFromInt(n);
}
pub fn min(l: Limit, n: usize) usize {
return @min(n, @intFromEnum(l));
}
pub fn slice(l: Limit, s: []u8) []u8 {
return s[0..min(l, s.len)];
}
pub fn toInt(l: Limit) ?usize {
return if (l == .none) null else @intFromEnum(l);
}
/// Reduces a slice to account for the limit, leaving room for one extra
/// byte above the limit, allowing for the use case of differentiating
/// between end-of-stream and reaching the limit.
pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 {
assert(non_empty_buffer.len >= 1);
return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)];
}
pub fn nonzero(l: Limit) bool {
return @intFromEnum(l) > 0;
}
/// Return a new limit reduced by `amount` or return `null` indicating
/// limit would be exceeded.
pub fn subtract(l: Limit, amount: usize) ?Limit {
if (l == .none) return .{ .next = .none };
if (amount > @intFromEnum(l)) return null;
return @enumFromInt(@intFromEnum(l) - amount);
}
};

View File

@ -31,9 +31,11 @@ pub const VTable = struct {
writeFile: *const fn (
ctx: ?*anyopaque,
file: std.fs.File,
/// If this is `none`, `file` will be streamed. Otherwise, it will be
/// read positionally without affecting the seek position.
offset: Offset,
/// When zero, it means copy until the end of the file is reached.
len: FileLen,
/// Maximum amount of bytes to read from the file.
limit: Limit,
/// Headers and trailers must be passed together so that in case `len` is
/// zero, they can be forwarded directly to `VTable.writev`.
headers_and_trailers: []const []const u8,
@ -41,7 +43,10 @@ pub const VTable = struct {
) anyerror!usize,
};
pub const Limit = std.io.Reader.Limit;
pub const Offset = enum(u64) {
zero = 0,
/// Indicates to read the file as a stream.
none = std.math.maxInt(u64),
_,
@ -53,24 +58,7 @@ pub const Offset = enum(u64) {
}
pub fn toInt(o: Offset) ?u64 {
if (o == .none) return null;
return @intFromEnum(o);
}
};
pub const FileLen = enum(u64) {
zero = 0,
entire_file = std.math.maxInt(u64),
_,
pub fn init(integer: u64) FileLen {
const result: FileLen = @enumFromInt(integer);
assert(result != .entire_file);
return result;
}
pub fn int(len: FileLen) u64 {
return @intFromEnum(len);
return if (o == .none) null else @intFromEnum(o);
}
};
@ -85,26 +73,26 @@ pub fn writeSplat(w: Writer, data: []const []const u8, splat: usize) anyerror!us
pub fn writeFile(
w: Writer,
file: std.fs.File,
offset: u64,
len: FileLen,
offset: Offset,
limit: Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
return w.vtable.writeFile(w.context, file, offset, len, headers_and_trailers, headers_len);
return w.vtable.writeFile(w.context, file, offset, limit, headers_and_trailers, headers_len);
}
pub fn unimplemented_writeFile(
context: ?*anyopaque,
file: std.fs.File,
offset: Offset,
len: FileLen,
limit: Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
_ = context;
_ = file;
_ = offset;
_ = len;
_ = limit;
_ = headers_and_trailers;
_ = headers_len;
return error.Unimplemented;
@ -143,7 +131,7 @@ fn null_writeFile(
context: ?*anyopaque,
file: std.fs.File,
offset: Offset,
len: FileLen,
limit: Limit,
headers_and_trailers: []const []const u8,
headers_len: usize,
) anyerror!usize {
@ -152,7 +140,7 @@ fn null_writeFile(
if (offset == .none) {
@panic("TODO seek the file forwards");
}
if (len == .entire_file) {
const limit_int = limit.toInt() orelse {
const headers = headers_and_trailers[0..headers_len];
for (headers) |bytes| n += bytes.len;
if (offset.toInt()) |off| {
@ -162,9 +150,9 @@ fn null_writeFile(
return n;
}
@panic("TODO stream from file until eof, counting");
}
};
for (headers_and_trailers) |bytes| n += bytes.len;
return len.int() + n;
return limit_int + n;
}
test @"null" {

View File

@ -543,20 +543,18 @@ test isUnderscore {
try std.testing.expect(!isUnderscore("\\x5f"));
}
pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?usize) ![:0]u8 {
const source_code = input.readToEndAllocOptions(
gpa,
max_src_size,
size_hint,
.of(u8),
0,
) catch |err| switch (err) {
pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: usize) ![:0]u8 {
var buffer: std.ArrayListAlignedUnmanaged(u8, .@"2") = .empty;
defer buffer.deinit(gpa);
try buffer.ensureUnusedCapacity(size_hint);
input.readIntoArrayList(gpa, .init(max_src_size), .@"2", &buffer) catch |err| switch (err) {
error.ConnectionResetByPeer => unreachable,
error.ConnectionTimedOut => unreachable,
error.NotOpenForReading => unreachable,
else => |e| return e,
};
errdefer gpa.free(source_code);
// Detect unsupported file types with their Byte Order Mark
const unsupported_boms = [_][]const u8{
@ -565,30 +563,23 @@ pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?
"\xfe\xff", // UTF-16 big endian
};
for (unsupported_boms) |bom| {
if (std.mem.startsWith(u8, source_code, bom)) {
if (std.mem.startsWith(u8, buffer.items, bom)) {
return error.UnsupportedEncoding;
}
}
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
if (std.mem.startsWith(u8, source_code, "\xff\xfe")) {
if (source_code.len % 2 != 0) return error.InvalidEncoding;
// TODO: after wrangle-writer-buffering branch is merged,
// avoid this unnecessary allocation
const aligned_copy = try gpa.alloc(u16, source_code.len / 2);
defer gpa.free(aligned_copy);
@memcpy(std.mem.sliceAsBytes(aligned_copy), source_code);
const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(gpa, aligned_copy) catch |err| switch (err) {
if (std.mem.startsWith(u8, buffer.items, "\xff\xfe")) {
if (buffer.items.len % 2 != 0) return error.InvalidEncoding;
return std.unicode.utf16LeToUtf8AllocZ(gpa, buffer.items) catch |err| switch (err) {
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
else => |e| return e,
};
gpa.free(source_code);
return source_code_utf8;
}
return source_code;
return buffer.toOwnedSliceSentinel(0);
}
pub fn printAstErrorsToStderr(gpa: Allocator, tree: Ast, path: []const u8, color: Color) !void {