From 73a8c9beaa63c48b6ddaeec8d2a67b239f0dec92 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Tue, 1 Sep 2020 18:48:43 +0200 Subject: [PATCH 1/4] std: Don't trust stat() size in readAllAlloc fns Some files such as the ones in /proc report a st_size of zero, try to read the file anyway if we hit that case. --- lib/std/fs.zig | 4 +--- lib/std/fs/file.zig | 28 +++++++++++++++++++++------- lib/std/fs/test.zig | 9 ++++----- src-self-hosted/main.zig | 3 ++- src-self-hosted/stage2.zig | 1 - 5 files changed, 28 insertions(+), 17 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 21a00eeb1d..9a44660570 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1454,9 +1454,7 @@ pub const Dir = struct { var file = try self.openFile(file_path, .{}); defer file.close(); - const stat_size = try file.getEndPos(); - - return file.readAllAllocOptions(allocator, stat_size, max_bytes, alignment, optional_sentinel); + return file.readAllAllocOptions(allocator, max_bytes, alignment, optional_sentinel); } pub const DeleteTreeError = error{ diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index 6fb2385a85..c34e5f9437 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -365,8 +365,8 @@ pub const File = struct { /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. - pub fn readAllAlloc(self: File, allocator: *mem.Allocator, stat_size: u64, max_bytes: usize) ![]u8 { - return self.readAllAllocOptions(allocator, stat_size, max_bytes, @alignOf(u8), null); + pub fn readAllAlloc(self: File, allocator: *mem.Allocator, max_bytes: usize) ![]u8 { + return self.readAllAllocOptions(allocator, max_bytes, @alignOf(u8), null); } /// On success, caller owns returned buffer. @@ -375,19 +375,33 @@ pub const File = struct { pub fn readAllAllocOptions( self: File, allocator: *mem.Allocator, - stat_size: u64, max_bytes: usize, comptime alignment: u29, comptime optional_sentinel: ?u8, ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) { + const stat_size = try self.getEndPos(); const size = math.cast(usize, stat_size) catch math.maxInt(usize); if (size > max_bytes) return error.FileTooBig; - const buf = try allocator.allocWithOptions(u8, size, alignment, optional_sentinel); - errdefer allocator.free(buf); + // The file size returned by stat is used as hint to set the buffer + // size. If the reported size is zero, as it happens on Linux for files + // in /proc, a small buffer is allocated instead. + const initial_cap = (if (size > 0) size else 1024) + @boolToInt(optional_sentinel != null); + var array_list = try std.ArrayListAligned(u8, alignment).initCapacity(allocator, initial_cap); + defer array_list.deinit(); - try self.reader().readNoEof(buf); - return buf; + self.reader().readAllArrayList(&array_list, max_bytes) catch |err| switch (err) { + error.StreamTooLong => return error.FileTooBig, + else => |e| return e, + }; + + if (optional_sentinel) |sentinel| { + try array_list.append(sentinel); + const buf = array_list.toOwnedSlice(); + return buf[0 .. buf.len - 1 :sentinel]; + } else { + return array_list.toOwnedSlice(); + } } pub const ReadError = os.ReadError; diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 409a53b1a7..c567602dd7 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -188,30 +188,29 @@ test "readAllAlloc" { var file = try tmp_dir.dir.createFile("test_file", .{ .read = true }); defer file.close(); - const buf1 = try file.readAllAlloc(testing.allocator, 0, 1024); + const buf1 = try file.readAllAlloc(testing.allocator, 1024); defer testing.allocator.free(buf1); testing.expect(buf1.len == 0); const write_buf: []const u8 = "this is a test.\nthis is a test.\nthis is a test.\nthis is a test.\n"; try file.writeAll(write_buf); try file.seekTo(0); - const file_size = try file.getEndPos(); // max_bytes > file_size - const buf2 = try file.readAllAlloc(testing.allocator, file_size, 1024); + const buf2 = try file.readAllAlloc(testing.allocator, 1024); defer testing.allocator.free(buf2); testing.expectEqual(write_buf.len, buf2.len); testing.expect(std.mem.eql(u8, write_buf, buf2)); try file.seekTo(0); // max_bytes == file_size - const buf3 = try file.readAllAlloc(testing.allocator, file_size, write_buf.len); + const buf3 = try file.readAllAlloc(testing.allocator, write_buf.len); defer testing.allocator.free(buf3); testing.expectEqual(write_buf.len, buf3.len); testing.expect(std.mem.eql(u8, write_buf, buf3)); // max_bytes < file_size - testing.expectError(error.FileTooBig, file.readAllAlloc(testing.allocator, file_size, write_buf.len - 1)); + testing.expectError(error.FileTooBig, file.readAllAlloc(testing.allocator, write_buf.len - 1)); } test "directory operations on files" { diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index ac25cd2eb8..b6ccc8a218 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -742,6 +742,7 @@ const FmtError = error{ LinkQuotaExceeded, FileBusy, EndOfStream, + Unseekable, NotOpenForWriting, } || fs.File.OpenError; @@ -805,7 +806,7 @@ fn fmtPathFile( if (stat.kind == .Directory) return error.IsDir; - const source_code = source_file.readAllAlloc(fmt.gpa, stat.size, max_src_size) catch |err| switch (err) { + const source_code = source_file.readAllAlloc(fmt.gpa, max_src_size) catch |err| switch (err) { error.ConnectionResetByPeer => unreachable, error.ConnectionTimedOut => unreachable, error.NotOpenForReading => unreachable, diff --git a/src-self-hosted/stage2.zig b/src-self-hosted/stage2.zig index 30d2ea44db..45b8ad3073 100644 --- a/src-self-hosted/stage2.zig +++ b/src-self-hosted/stage2.zig @@ -615,7 +615,6 @@ export fn stage2_libc_parse(stage1_libc: *Stage2LibCInstallation, libc_file_z: [ error.NotOpenForWriting => unreachable, error.NotOpenForReading => unreachable, error.Unexpected => return .Unexpected, - error.EndOfStream => return .EndOfFile, error.IsDir => return .IsDir, error.ConnectionResetByPeer => unreachable, error.ConnectionTimedOut => unreachable, From 5f31d54064b204910c18667d0b68b8bf2b57cffb Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Wed, 2 Sep 2020 10:51:15 +0200 Subject: [PATCH 2/4] std: ArrayList.initCapacity now respects the specified cap Don't use the user-supplied cap as starting point for a resize. Doing so overallocates memory and thus negates the whole point of specifying a precise cap value. --- lib/std/array_list.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index a7432a30ae..f298d14631 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -46,7 +46,11 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { /// Deinitialize with `deinit` or use `toOwnedSlice`. pub fn initCapacity(allocator: *Allocator, num: usize) !Self { var self = Self.init(allocator); - try self.ensureCapacity(num); + + const new_memory = try self.allocator.allocAdvanced(T, alignment, num, .at_least); + self.items.ptr = new_memory.ptr; + self.capacity = new_memory.len; + return self; } @@ -366,7 +370,11 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ /// Deinitialize with `deinit` or use `toOwnedSlice`. pub fn initCapacity(allocator: *Allocator, num: usize) !Self { var self = Self{}; - try self.ensureCapacity(allocator, num); + + const new_memory = try self.allocator.allocAdvanced(T, alignment, num, .at_least); + self.items.ptr = new_memory.ptr; + self.capacity = new_memory.len; + return self; } From 90743881cf10c4f90da4a8c187997e9eab4d17d5 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 4 Sep 2020 09:28:43 +0200 Subject: [PATCH 3/4] std: Minor changes to the fs module * Add a size_hint parameter to the read{toEnd,File}AllocOptions fns * Rename readAllAlloc{,Options} to readToEndAlloc{,Options} as they don't rewind the file before reading * Fix missing rewind in test case --- lib/std/fs.zig | 9 +++++++-- lib/std/fs/file.zig | 16 ++++++++++------ lib/std/fs/test.zig | 9 +++++---- src-self-hosted/Module.zig | 2 ++ src-self-hosted/main.zig | 8 +++++++- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 9a44660570..4005f90fcb 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1437,24 +1437,29 @@ pub const Dir = struct { /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. pub fn readFileAlloc(self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 { - return self.readFileAllocOptions(allocator, file_path, max_bytes, @alignOf(u8), null); + return self.readFileAllocOptions(allocator, file_path, max_bytes, null, @alignOf(u8), null); } /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. + /// If `size_hint` is specified the initial buffer size is calculated using + /// that value, otherwise the effective file size is used instead. /// Allows specifying alignment and a sentinel value. pub fn readFileAllocOptions( self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize, + size_hint: ?usize, comptime alignment: u29, comptime optional_sentinel: ?u8, ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) { var file = try self.openFile(file_path, .{}); defer file.close(); - return file.readAllAllocOptions(allocator, max_bytes, alignment, optional_sentinel); + const stat_size = size_hint orelse try file.getEndPos(); + + return file.readToEndAllocOptions(allocator, max_bytes, stat_size, alignment, optional_sentinel); } pub const DeleteTreeError = error{ diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index c34e5f9437..ef1b501ec3 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -363,25 +363,29 @@ pub const File = struct { try os.futimens(self.handle, ×); } + /// Reads all the bytes from the current position to the end of the file. /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. - pub fn readAllAlloc(self: File, allocator: *mem.Allocator, max_bytes: usize) ![]u8 { - return self.readAllAllocOptions(allocator, max_bytes, @alignOf(u8), null); + pub fn readToEndAlloc(self: File, allocator: *mem.Allocator, max_bytes: usize) ![]u8 { + return self.readToEndAllocOptions(allocator, max_bytes, null, @alignOf(u8), null); } + /// Reads all the bytes from the current position to the end of the file. /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. + /// If `size_hint` is specified the initial buffer size is calculated using + /// that value, otherwise an arbitrary value is used instead. /// Allows specifying alignment and a sentinel value. - pub fn readAllAllocOptions( + pub fn readToEndAllocOptions( self: File, allocator: *mem.Allocator, max_bytes: usize, + size_hint: ?usize, comptime alignment: u29, comptime optional_sentinel: ?u8, ) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) { - const stat_size = try self.getEndPos(); - const size = math.cast(usize, stat_size) catch math.maxInt(usize); - if (size > max_bytes) return error.FileTooBig; + // If no size hint is provided fall back to the size=0 code path + const size = size_hint orelse 0; // The file size returned by stat is used as hint to set the buffer // size. If the reported size is zero, as it happens on Linux for files diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index c567602dd7..a59bc46245 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -188,7 +188,7 @@ test "readAllAlloc" { var file = try tmp_dir.dir.createFile("test_file", .{ .read = true }); defer file.close(); - const buf1 = try file.readAllAlloc(testing.allocator, 1024); + const buf1 = try file.readToEndAlloc(testing.allocator, 1024); defer testing.allocator.free(buf1); testing.expect(buf1.len == 0); @@ -197,20 +197,21 @@ test "readAllAlloc" { try file.seekTo(0); // max_bytes > file_size - const buf2 = try file.readAllAlloc(testing.allocator, 1024); + const buf2 = try file.readToEndAlloc(testing.allocator, 1024); defer testing.allocator.free(buf2); testing.expectEqual(write_buf.len, buf2.len); testing.expect(std.mem.eql(u8, write_buf, buf2)); try file.seekTo(0); // max_bytes == file_size - const buf3 = try file.readAllAlloc(testing.allocator, write_buf.len); + const buf3 = try file.readToEndAlloc(testing.allocator, write_buf.len); defer testing.allocator.free(buf3); testing.expectEqual(write_buf.len, buf3.len); testing.expect(std.mem.eql(u8, write_buf, buf3)); + try file.seekTo(0); // max_bytes < file_size - testing.expectError(error.FileTooBig, file.readAllAlloc(testing.allocator, write_buf.len - 1)); + testing.expectError(error.FileTooBig, file.readToEndAlloc(testing.allocator, write_buf.len - 1)); } test "directory operations on files" { diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index c476c307d2..5cc0b3f892 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -595,6 +595,7 @@ pub const Scope = struct { module.gpa, self.sub_file_path, std.math.maxInt(u32), + null, 1, 0, ); @@ -697,6 +698,7 @@ pub const Scope = struct { module.gpa, self.sub_file_path, std.math.maxInt(u32), + null, 1, 0, ); diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index b6ccc8a218..6c56ef885b 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -806,7 +806,13 @@ fn fmtPathFile( if (stat.kind == .Directory) return error.IsDir; - const source_code = source_file.readAllAlloc(fmt.gpa, max_src_size) catch |err| switch (err) { + const source_code = source_file.readToEndAllocOptions( + fmt.gpa, + max_src_size, + stat.size, + @alignOf(u8), + null, + ) catch |err| switch (err) { error.ConnectionResetByPeer => unreachable, error.ConnectionTimedOut => unreachable, error.NotOpenForReading => unreachable, From 3c8e1bc25b6d122ce1295e2e33bb9f54ae801ec0 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 4 Sep 2020 12:48:36 +0200 Subject: [PATCH 4/4] std: Fix for 32bit systems --- lib/std/fs.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 4005f90fcb..a217fb3e9b 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1457,7 +1457,10 @@ pub const Dir = struct { var file = try self.openFile(file_path, .{}); defer file.close(); - const stat_size = size_hint orelse try file.getEndPos(); + // If the file size doesn't fit a usize it'll be certainly greater than + // `max_bytes` + const stat_size = size_hint orelse math.cast(usize, try file.getEndPos()) catch + return error.FileTooBig; return file.readToEndAllocOptions(allocator, max_bytes, stat_size, alignment, optional_sentinel); }