From 3158dc424e48d3da52d9a8f99cba65a4ef850f2e Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 5 Mar 2020 00:07:17 -0700 Subject: [PATCH 01/49] Partially implement cache hash API in zig --- lib/std/cache_hash.zig | 329 +++++++++++++++++++++++++++++++++++++++++ lib/std/std.zig | 1 + 2 files changed, 330 insertions(+) create mode 100644 lib/std/cache_hash.zig diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig new file mode 100644 index 0000000000..ea85cb6db3 --- /dev/null +++ b/lib/std/cache_hash.zig @@ -0,0 +1,329 @@ +const Blake3 = @import("crypto.zig").Blake3; +const fs = @import("fs.zig"); +const File = fs.File; +const base64 = @import("base64.zig"); +const ArrayList = @import("array_list.zig").ArrayList; +const debug = @import("debug.zig"); +const testing = @import("testing.zig"); +const mem = @import("mem.zig"); +const fmt = @import("fmt.zig"); +const Allocator = mem.Allocator; +const Buffer = @import("buffer.zig").Buffer; +const os = @import("os.zig"); + +const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +const base64_pad_char = '='; +const encoder = base64.Base64Encoder.init(base64_alphabet, base64_pad_char); +const decoder = base64.Base64Decoder.init(base64_alphabet, base64_pad_char); +const BIN_DIGEST_LEN = 32; + +pub const CacheHashFile = struct { + path: ?[]const u8, + stat: fs.File.Stat, + file_handle: os.fd_t, + bin_digest: [BIN_DIGEST_LEN]u8, + contents: ?[]const u8, + + pub fn deinit(self: *@This(), alloc: *Allocator) void { + if (self.path) |owned_slice| { + alloc.free(owned_slice); + self.path = null; + } + if (self.contents) |owned_slice| { + alloc.free(owned_slice); + self.contents = null; + } + } +}; + +pub const CacheHash = struct { + alloc: *Allocator, + blake3: Blake3, + manifest_dir: []const u8, + manifest_file_path: ?[]const u8, + manifest_file: ?File, + manifest_dirty: bool, + force_check_manifest: bool, + files: ArrayList(CacheHashFile), + b64_digest: ArrayList(u8), + + pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { + return CacheHash{ + .alloc = alloc, + .blake3 = Blake3.init(), + .manifest_dir = manifest_dir_path, + .manifest_file_path = null, + .manifest_file = null, + .manifest_dirty = false, + .force_check_manifest = false, + .files = ArrayList(CacheHashFile).init(alloc), + .b64_digest = ArrayList(u8).init(alloc), + }; + } + + pub fn cache_buf(self: *@This(), val: []const u8) !void { + debug.assert(self.manifest_file_path == null); + + var temp_buffer = try self.alloc.alloc(u8, val.len + 1); + defer self.alloc.free(temp_buffer); + + mem.copy(u8, temp_buffer, val); + temp_buffer[val.len] = 0; + + self.blake3.update(temp_buffer); + } + + pub fn cache_file(self: *@This(), file_path: []const u8) !void { + debug.assert(self.manifest_file_path == null); + + var cache_hash_file = try self.files.addOne(); + cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); + + try self.cache_buf(cache_hash_file.path.?); + } + + pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { + debug.assert(self.manifest_file_path == null); + + var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; + self.blake3.final(&bin_digest); + + const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); + try self.b64_digest.resize(OUT_DIGEST_LEN); + encoder.encode(self.b64_digest.toSlice(), &bin_digest); + + if (self.files.toSlice().len == 0 and !self.force_check_manifest) { + try out_digest.resize(OUT_DIGEST_LEN); + mem.copy(u8, out_digest.toSlice(), self.b64_digest.toSlice()); + return true; + } + + self.blake3 = Blake3.init(); + self.blake3.update(&bin_digest); + + { + const manifest_file_path_slice = try fs.path.join(self.alloc, &[_][]const u8{ self.manifest_dir, self.b64_digest.toSlice() }); + var path_buf = ArrayList(u8).fromOwnedSlice(self.alloc, manifest_file_path_slice); + defer path_buf.deinit(); + try path_buf.appendSlice(".txt"); + + self.manifest_file_path = path_buf.toOwnedSlice(); + } + + const cwd = fs.cwd(); + + try cwd.makePath(self.manifest_dir); + + // TODO: Open file with a file lock + self.manifest_file = try cwd.createFile(self.manifest_file_path.?, .{ .read = true, .truncate = false }); + + // TODO: Figure out a good max value? + const file_contents = try self.manifest_file.?.inStream().stream.readAllAlloc(self.alloc, 16 * 1024); + defer self.alloc.free(file_contents); + + const input_file_count = self.files.len; + var any_file_changed = false; + var line_iter = mem.tokenize(file_contents, "\n"); + var idx: usize = 0; + while (line_iter.next()) |line| { + defer idx += 1; + + var cache_hash_file: *CacheHashFile = undefined; + if (idx < input_file_count) { + cache_hash_file = self.files.ptrAt(idx); + } else { + cache_hash_file = try self.files.addOne(); + cache_hash_file.path = null; + } + + var iter = mem.tokenize(line, " "); + const file_handle_str = iter.next() orelse return error.InvalidFormat; + const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; + const digest_str = iter.next() orelse return error.InvalidFormat; + const file_path = iter.rest(); + + cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat; + cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + + if (file_path.len == 0) { + return error.InvalidFormat; + } + if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) { + return error.InvalidFormat; + } + cache_hash_file.path = try mem.dupe(self.alloc, u8, file_path); + + const this_file = cwd.openFile(cache_hash_file.path.?, .{ .read = true }) catch { + self.manifest_file.?.close(); + self.manifest_file = null; + return error.CacheUnavailable; + }; + defer this_file.close(); + cache_hash_file.stat = try this_file.stat(); + // TODO: check mtime + if (false) {} else { + self.manifest_dirty = true; + + // TODO: check for problematic timestamp + + var actual_digest: [32]u8 = undefined; + try hash_file(self.alloc, &actual_digest, &this_file); + + if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { + mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); + // keep going until we have the input file digests + any_file_changed = true; + } + } + + if (!any_file_changed) { + self.blake3.update(&cache_hash_file.bin_digest); + } + } + + if (any_file_changed) { + // cache miss + // keep the manifest file open (TODO: with rw lock) + // reset the hash + self.blake3 = Blake3.init(); + self.blake3.update(&bin_digest); + try self.files.resize(input_file_count); + for (self.files.toSlice()) |file| { + self.blake3.update(&file.bin_digest); + } + return false; + } + + if (idx < input_file_count or idx == 0) { + self.manifest_dirty = true; + while (idx < input_file_count) : (idx += 1) { + var cache_hash_file = self.files.ptrAt(idx); + self.populate_file_hash(cache_hash_file) catch |err| { + self.manifest_file.?.close(); + self.manifest_file = null; + return error.CacheUnavailable; + }; + } + return false; + } + + try self.final(out_digest); + return true; + } + + pub fn populate_file_hash(self: *@This(), cache_hash_file: *CacheHashFile) !void { + debug.assert(cache_hash_file.path != null); + + const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); + defer this_file.close(); + + cache_hash_file.stat = try this_file.stat(); + + // TODO: check for problematic timestamp + + try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file); + self.blake3.update(&cache_hash_file.bin_digest); + } + + pub fn final(self: *@This(), out_digest: *ArrayList(u8)) !void { + debug.assert(self.manifest_file_path != null); + + var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; + self.blake3.final(&bin_digest); + + const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); + try out_digest.resize(OUT_DIGEST_LEN); + encoder.encode(out_digest.toSlice(), &bin_digest); + } + + pub fn write_manifest(self: *@This()) !void { + debug.assert(self.manifest_file_path != null); + + const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); + var encoded_digest = try Buffer.initSize(self.alloc, OUT_DIGEST_LEN); + defer encoded_digest.deinit(); + var contents = try Buffer.init(self.alloc, ""); + defer contents.deinit(); + + for (self.files.toSlice()) |file| { + encoder.encode(encoded_digest.toSlice(), &file.bin_digest); + try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path }); + } + + try self.manifest_file.?.seekTo(0); + try self.manifest_file.?.writeAll(contents.toSlice()); + } + + pub fn release(self: *@This()) void { + debug.assert(self.manifest_file_path != null); + + if (self.manifest_dirty) { + self.write_manifest() catch |err| { + debug.warn("Unable to write cache file '{}': {}\n", .{ self.manifest_file_path, err }); + }; + } + + self.manifest_file.?.close(); + if (self.manifest_file_path) |owned_slice| { + self.alloc.free(owned_slice); + } + for (self.files.toSlice()) |*file| { + file.deinit(self.alloc); + } + self.files.deinit(); + self.b64_digest.deinit(); + } +}; + +fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void { + var blake3 = Blake3.init(); + var in_stream = handle.inStream().stream; + + const contents = try handle.inStream().stream.readAllAlloc(alloc, 64 * 1024); + defer alloc.free(contents); + + blake3.update(contents); + + blake3.final(bin_digest); +} + +test "see if imported" { + const cwd = fs.cwd(); + + const temp_manifest_dir = "temp_manifest_dir"; + + try cwd.writeFile("test.txt", "Hello, world!\n"); + + var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32); + defer digest1.deinit(); + var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32); + defer digest2.deinit(); + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + try ch.cache_buf("1234"); + try ch.cache_file("test.txt"); + + // There should be nothing in the cache + debug.assert((try ch.hit(&digest1)) == false); + + try ch.final(&digest1); + } + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + try ch.cache_buf("1234"); + try ch.cache_file("test.txt"); + + // Cache hit! We just "built" the same file + debug.assert((try ch.hit(&digest2)) == true); + } + + debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice())); + + try cwd.deleteTree(temp_manifest_dir); +} diff --git a/lib/std/std.zig b/lib/std/std.zig index 376c200200..9920ca3378 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -31,6 +31,7 @@ pub const base64 = @import("base64.zig"); pub const build = @import("build.zig"); pub const builtin = @import("builtin.zig"); pub const c = @import("c.zig"); +pub const cache_hash = @import("cache_hash.zig"); pub const coff = @import("coff.zig"); pub const crypto = @import("crypto.zig"); pub const cstr = @import("cstr.zig"); From 8a77c1c637b07a2ff4f8f8981f9f732756cc38f6 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 5 Mar 2020 21:32:26 -0700 Subject: [PATCH 02/49] Add `cache` method; add support for caching integers --- lib/std/cache_hash.zig | 63 ++++++++++++------------------------------ 1 file changed, 18 insertions(+), 45 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index ea85cb6db3..710f853124 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -64,13 +64,26 @@ pub const CacheHash = struct { pub fn cache_buf(self: *@This(), val: []const u8) !void { debug.assert(self.manifest_file_path == null); - var temp_buffer = try self.alloc.alloc(u8, val.len + 1); - defer self.alloc.free(temp_buffer); + self.blake3.update(val); + } - mem.copy(u8, temp_buffer, val); - temp_buffer[val.len] = 0; + pub fn cache(self: *@This(), val: var) !void { + debug.assert(self.manifest_file_path == null); - self.blake3.update(temp_buffer); + const val_type = @TypeOf(val); + switch (@typeInfo(val_type)) { + .Int => |int_info| if (int_info.bits != 0 and int_info.bits % 8 == 0) { + const buf_len = @divExact(int_info.bits, 8); + var buf: [buf_len]u8 = undefined; + mem.writeIntNative(val_type, &buf, val); + self.blake3.update(&buf); + } else { + @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); + }, + else => @compileError("Unsupported type"), + } + + self.blake3.update(&[_]u8{0}); } pub fn cache_file(self: *@This(), file_path: []const u8) !void { @@ -287,43 +300,3 @@ fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void { blake3.final(bin_digest); } - -test "see if imported" { - const cwd = fs.cwd(); - - const temp_manifest_dir = "temp_manifest_dir"; - - try cwd.writeFile("test.txt", "Hello, world!\n"); - - var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32); - defer digest1.deinit(); - var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32); - defer digest2.deinit(); - - { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); - - try ch.cache_buf("1234"); - try ch.cache_file("test.txt"); - - // There should be nothing in the cache - debug.assert((try ch.hit(&digest1)) == false); - - try ch.final(&digest1); - } - { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); - - try ch.cache_buf("1234"); - try ch.cache_file("test.txt"); - - // Cache hit! We just "built" the same file - debug.assert((try ch.hit(&digest2)) == true); - } - - debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice())); - - try cwd.deleteTree(temp_manifest_dir); -} From de341b8fb86374ac62a36c987e11d1dd0b8c3358 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 5 Mar 2020 22:59:19 -0700 Subject: [PATCH 03/49] Fix memory leak in cache_hash --- lib/std/cache_hash.zig | 52 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 710f853124..2993d5a19d 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -130,9 +130,14 @@ pub const CacheHash = struct { // TODO: Open file with a file lock self.manifest_file = try cwd.createFile(self.manifest_file_path.?, .{ .read = true, .truncate = false }); + // create a buffer instead of using readAllAlloc + // See: https://github.com/ziglang/zig/issues/4656 + var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024); + defer file_buffer.deinit(); + // TODO: Figure out a good max value? - const file_contents = try self.manifest_file.?.inStream().stream.readAllAlloc(self.alloc, 16 * 1024); - defer self.alloc.free(file_contents); + try self.manifest_file.?.inStream().stream.readAllBuffer(&file_buffer, 16 * 1024); + const file_contents = file_buffer.toSliceConst(); const input_file_count = self.files.len; var any_file_changed = false; @@ -165,7 +170,6 @@ pub const CacheHash = struct { if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) { return error.InvalidFormat; } - cache_hash_file.path = try mem.dupe(self.alloc, u8, file_path); const this_file = cwd.openFile(cache_hash_file.path.?, .{ .read = true }) catch { self.manifest_file.?.close(); @@ -300,3 +304,45 @@ fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void { blake3.final(bin_digest); } + +test "cache file and the recall it" { + const cwd = fs.cwd(); + + const temp_manifest_dir = "temp_manifest_dir"; + + try cwd.writeFile("test.txt", "Hello, world!\n"); + + var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32); + defer digest1.deinit(); + var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32); + defer digest2.deinit(); + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + try ch.cache(@as(u16, 1234)); + try ch.cache_buf("1234"); + try ch.cache_file("test.txt"); + + // There should be nothing in the cache + debug.assert((try ch.hit(&digest1)) == false); + + try ch.final(&digest1); + } + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + try ch.cache(@as(u16, 1234)); + try ch.cache_buf("1234"); + try ch.cache_file("test.txt"); + + // Cache hit! We just "built" the same file + debug.assert((try ch.hit(&digest2)) == true); + } + + debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice())); + + try cwd.deleteTree(temp_manifest_dir); +} From ce5b2286f1db3d5a01ce139b450140f21c9d9909 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 5 Mar 2020 23:22:55 -0700 Subject: [PATCH 04/49] Support caching bools; make caching values infallible --- lib/std/cache_hash.zig | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 2993d5a19d..22a1da3d85 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -61,13 +61,14 @@ pub const CacheHash = struct { }; } - pub fn cache_buf(self: *@This(), val: []const u8) !void { + pub fn cache_buf(self: *@This(), val: []const u8) void { debug.assert(self.manifest_file_path == null); self.blake3.update(val); + self.blake3.update(&[_]u8{0}); } - pub fn cache(self: *@This(), val: var) !void { + pub fn cache(self: *@This(), val: var) void { debug.assert(self.manifest_file_path == null); const val_type = @TypeOf(val); @@ -76,14 +77,17 @@ pub const CacheHash = struct { const buf_len = @divExact(int_info.bits, 8); var buf: [buf_len]u8 = undefined; mem.writeIntNative(val_type, &buf, val); - self.blake3.update(&buf); + self.cache_buf(&buf); } else { @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); }, + .Bool => { + var buf: [1]u8 = undefined; + buf[0] = if (val) 1 else 0; + self.blake3.update(&buf); + }, else => @compileError("Unsupported type"), } - - self.blake3.update(&[_]u8{0}); } pub fn cache_file(self: *@This(), file_path: []const u8) !void { @@ -92,7 +96,7 @@ pub const CacheHash = struct { var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - try self.cache_buf(cache_hash_file.path.?); + self.cache_buf(cache_hash_file.path.?); } pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { @@ -321,8 +325,9 @@ test "cache file and the recall it" { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); defer ch.release(); - try ch.cache(@as(u16, 1234)); - try ch.cache_buf("1234"); + ch.cache(true); + ch.cache(@as(u16, 1234)); + ch.cache_buf("1234"); try ch.cache_file("test.txt"); // There should be nothing in the cache @@ -334,8 +339,9 @@ test "cache file and the recall it" { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); defer ch.release(); - try ch.cache(@as(u16, 1234)); - try ch.cache_buf("1234"); + ch.cache(true); + ch.cache(@as(u16, 1234)); + ch.cache_buf("1234"); try ch.cache_file("test.txt"); // Cache hit! We just "built" the same file From 8c8813a5cfa710478614cbb022481f830f199bad Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 20:17:23 -0700 Subject: [PATCH 05/49] Add filesystem base64 decoder --- lib/std/fs.zig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index a006d4ea05..c3e8d69b0d 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -55,6 +55,12 @@ pub const base64_encoder = base64.Base64Encoder.init( base64.standard_pad_char, ); +/// Base64, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. +pub const base64_decoder = base64.Base64Decoder.init( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", + base64.standard_pad_char, +); + /// Whether or not async file system syscalls need a dedicated thread because the operating /// system does not support non-blocking I/O on the file system. pub const need_async_thread = std.io.is_async and switch (builtin.os.tag) { From 86fe88bbcbbd14455ada878db75b387fb5f864fc Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 20:18:34 -0700 Subject: [PATCH 06/49] Use std.fs.base64_encoder in std.cache_hash --- lib/std/cache_hash.zig | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 22a1da3d85..d2e4e28a0d 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -11,10 +11,8 @@ const Allocator = mem.Allocator; const Buffer = @import("buffer.zig").Buffer; const os = @import("os.zig"); -const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -const base64_pad_char = '='; -const encoder = base64.Base64Encoder.init(base64_alphabet, base64_pad_char); -const decoder = base64.Base64Decoder.init(base64_alphabet, base64_pad_char); +const base64_encoder = fs.base64_encoder; +const base64_decoder = fs.base64_decoder; const BIN_DIGEST_LEN = 32; pub const CacheHashFile = struct { @@ -107,7 +105,7 @@ pub const CacheHash = struct { const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); try self.b64_digest.resize(OUT_DIGEST_LEN); - encoder.encode(self.b64_digest.toSlice(), &bin_digest); + base64_encoder.encode(self.b64_digest.toSlice(), &bin_digest); if (self.files.toSlice().len == 0 and !self.force_check_manifest) { try out_digest.resize(OUT_DIGEST_LEN); @@ -166,7 +164,7 @@ pub const CacheHash = struct { cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat; cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; - decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; if (file_path.len == 0) { return error.InvalidFormat; @@ -255,7 +253,7 @@ pub const CacheHash = struct { const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); try out_digest.resize(OUT_DIGEST_LEN); - encoder.encode(out_digest.toSlice(), &bin_digest); + base64_encoder.encode(out_digest.toSlice(), &bin_digest); } pub fn write_manifest(self: *@This()) !void { @@ -268,7 +266,7 @@ pub const CacheHash = struct { defer contents.deinit(); for (self.files.toSlice()) |file| { - encoder.encode(encoded_digest.toSlice(), &file.bin_digest); + base64_encoder.encode(encoded_digest.toSlice(), &file.bin_digest); try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path }); } From c8062321b3838b35bdf9931026cbea1f67ba6bc1 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 20:21:43 -0700 Subject: [PATCH 07/49] Use fs.File --- lib/std/cache_hash.zig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index d2e4e28a0d..bd5ec3af60 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -1,6 +1,5 @@ const Blake3 = @import("crypto.zig").Blake3; const fs = @import("fs.zig"); -const File = fs.File; const base64 = @import("base64.zig"); const ArrayList = @import("array_list.zig").ArrayList; const debug = @import("debug.zig"); @@ -39,7 +38,7 @@ pub const CacheHash = struct { blake3: Blake3, manifest_dir: []const u8, manifest_file_path: ?[]const u8, - manifest_file: ?File, + manifest_file: ?fs.File, manifest_dirty: bool, force_check_manifest: bool, files: ArrayList(CacheHashFile), @@ -295,7 +294,7 @@ pub const CacheHash = struct { } }; -fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const File) !void { +fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void { var blake3 = Blake3.init(); var in_stream = handle.inStream().stream; From 55a3925ab79adae174faa97ae132518541c549cb Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 20:23:15 -0700 Subject: [PATCH 08/49] Rename CacheHashFile -> File --- lib/std/cache_hash.zig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index bd5ec3af60..45cf6eb756 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -14,7 +14,7 @@ const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; const BIN_DIGEST_LEN = 32; -pub const CacheHashFile = struct { +pub const File = struct { path: ?[]const u8, stat: fs.File.Stat, file_handle: os.fd_t, @@ -41,7 +41,7 @@ pub const CacheHash = struct { manifest_file: ?fs.File, manifest_dirty: bool, force_check_manifest: bool, - files: ArrayList(CacheHashFile), + files: ArrayList(File), b64_digest: ArrayList(u8), pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { @@ -53,7 +53,7 @@ pub const CacheHash = struct { .manifest_file = null, .manifest_dirty = false, .force_check_manifest = false, - .files = ArrayList(CacheHashFile).init(alloc), + .files = ArrayList(File).init(alloc), .b64_digest = ArrayList(u8).init(alloc), }; } @@ -147,7 +147,7 @@ pub const CacheHash = struct { while (line_iter.next()) |line| { defer idx += 1; - var cache_hash_file: *CacheHashFile = undefined; + var cache_hash_file: *File = undefined; if (idx < input_file_count) { cache_hash_file = self.files.ptrAt(idx); } else { @@ -230,7 +230,7 @@ pub const CacheHash = struct { return true; } - pub fn populate_file_hash(self: *@This(), cache_hash_file: *CacheHashFile) !void { + pub fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { debug.assert(cache_hash_file.path != null); const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); From 27bf1f781b5246914ba2fbdf556dbf72bc926b17 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 21:02:38 -0700 Subject: [PATCH 09/49] Store fs.Dir instead of path to dir --- lib/std/cache_hash.zig | 46 +++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 45cf6eb756..7ec642d348 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -36,8 +36,7 @@ pub const File = struct { pub const CacheHash = struct { alloc: *Allocator, blake3: Blake3, - manifest_dir: []const u8, - manifest_file_path: ?[]const u8, + manifest_dir: fs.Dir, manifest_file: ?fs.File, manifest_dirty: bool, force_check_manifest: bool, @@ -45,11 +44,13 @@ pub const CacheHash = struct { b64_digest: ArrayList(u8), pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { + try fs.cwd().makePath(manifest_dir_path); + const manifest_dir = try fs.cwd().openDirTraverse(manifest_dir_path); + return CacheHash{ .alloc = alloc, .blake3 = Blake3.init(), - .manifest_dir = manifest_dir_path, - .manifest_file_path = null, + .manifest_dir = manifest_dir, .manifest_file = null, .manifest_dirty = false, .force_check_manifest = false, @@ -59,14 +60,14 @@ pub const CacheHash = struct { } pub fn cache_buf(self: *@This(), val: []const u8) void { - debug.assert(self.manifest_file_path == null); + debug.assert(self.manifest_file == null); self.blake3.update(val); self.blake3.update(&[_]u8{0}); } pub fn cache(self: *@This(), val: var) void { - debug.assert(self.manifest_file_path == null); + debug.assert(self.manifest_file == null); const val_type = @TypeOf(val); switch (@typeInfo(val_type)) { @@ -88,7 +89,7 @@ pub const CacheHash = struct { } pub fn cache_file(self: *@This(), file_path: []const u8) !void { - debug.assert(self.manifest_file_path == null); + debug.assert(self.manifest_file == null); var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); @@ -97,7 +98,7 @@ pub const CacheHash = struct { } pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { - debug.assert(self.manifest_file_path == null); + debug.assert(self.manifest_file == null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); @@ -116,21 +117,12 @@ pub const CacheHash = struct { self.blake3.update(&bin_digest); { - const manifest_file_path_slice = try fs.path.join(self.alloc, &[_][]const u8{ self.manifest_dir, self.b64_digest.toSlice() }); - var path_buf = ArrayList(u8).fromOwnedSlice(self.alloc, manifest_file_path_slice); - defer path_buf.deinit(); - try path_buf.appendSlice(".txt"); + const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest.toSlice()}); + defer self.alloc.free(manifest_file_path); - self.manifest_file_path = path_buf.toOwnedSlice(); + self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false }); } - const cwd = fs.cwd(); - - try cwd.makePath(self.manifest_dir); - - // TODO: Open file with a file lock - self.manifest_file = try cwd.createFile(self.manifest_file_path.?, .{ .read = true, .truncate = false }); - // create a buffer instead of using readAllAlloc // See: https://github.com/ziglang/zig/issues/4656 var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024); @@ -172,7 +164,7 @@ pub const CacheHash = struct { return error.InvalidFormat; } - const this_file = cwd.openFile(cache_hash_file.path.?, .{ .read = true }) catch { + const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { self.manifest_file.?.close(); self.manifest_file = null; return error.CacheUnavailable; @@ -245,7 +237,7 @@ pub const CacheHash = struct { } pub fn final(self: *@This(), out_digest: *ArrayList(u8)) !void { - debug.assert(self.manifest_file_path != null); + debug.assert(self.manifest_file != null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); @@ -256,7 +248,7 @@ pub const CacheHash = struct { } pub fn write_manifest(self: *@This()) !void { - debug.assert(self.manifest_file_path != null); + debug.assert(self.manifest_file != null); const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); var encoded_digest = try Buffer.initSize(self.alloc, OUT_DIGEST_LEN); @@ -274,23 +266,21 @@ pub const CacheHash = struct { } pub fn release(self: *@This()) void { - debug.assert(self.manifest_file_path != null); + debug.assert(self.manifest_file != null); if (self.manifest_dirty) { self.write_manifest() catch |err| { - debug.warn("Unable to write cache file '{}': {}\n", .{ self.manifest_file_path, err }); + debug.warn("Unable to write cache file '{}': {}\n", .{ self.b64_digest, err }); }; } self.manifest_file.?.close(); - if (self.manifest_file_path) |owned_slice| { - self.alloc.free(owned_slice); - } for (self.files.toSlice()) |*file| { file.deinit(self.alloc); } self.files.deinit(); self.b64_digest.deinit(); + self.manifest_dir.close(); } }; From 4173dbdce907e39f376a08ce2f192655fc62b664 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 21:25:22 -0700 Subject: [PATCH 10/49] Rename `cache` functions to `add` --- lib/std/cache_hash.zig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 7ec642d348..1324a7ce79 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -59,14 +59,14 @@ pub const CacheHash = struct { }; } - pub fn cache_buf(self: *@This(), val: []const u8) void { + pub fn addSlice(self: *@This(), val: []const u8) void { debug.assert(self.manifest_file == null); self.blake3.update(val); self.blake3.update(&[_]u8{0}); } - pub fn cache(self: *@This(), val: var) void { + pub fn add(self: *@This(), val: var) void { debug.assert(self.manifest_file == null); const val_type = @TypeOf(val); @@ -75,7 +75,7 @@ pub const CacheHash = struct { const buf_len = @divExact(int_info.bits, 8); var buf: [buf_len]u8 = undefined; mem.writeIntNative(val_type, &buf, val); - self.cache_buf(&buf); + self.addSlice(&buf); } else { @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); }, @@ -94,7 +94,7 @@ pub const CacheHash = struct { var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - self.cache_buf(cache_hash_file.path.?); + self.addSlice(cache_hash_file.path.?); } pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { @@ -312,9 +312,9 @@ test "cache file and the recall it" { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); defer ch.release(); - ch.cache(true); - ch.cache(@as(u16, 1234)); - ch.cache_buf("1234"); + ch.add(true); + ch.add(@as(u16, 1234)); + ch.addSlice("1234"); try ch.cache_file("test.txt"); // There should be nothing in the cache @@ -326,9 +326,9 @@ test "cache file and the recall it" { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); defer ch.release(); - ch.cache(true); - ch.cache(@as(u16, 1234)); - ch.cache_buf("1234"); + ch.add(true); + ch.add(@as(u16, 1234)); + ch.addSlice("1234"); try ch.cache_file("test.txt"); // Cache hit! We just "built" the same file From fde188aadcc8c7ff279c9365e38b1bec114af08b Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 21:40:33 -0700 Subject: [PATCH 11/49] Make type specific add functions Basically, move type specific code into their own functions instead of making `add` a giant function responsible for everything. --- lib/std/cache_hash.zig | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 1324a7ce79..fa2b2e08c0 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -66,24 +66,38 @@ pub const CacheHash = struct { self.blake3.update(&[_]u8{0}); } + pub fn addBool(self: *@This(), val: bool) void { + debug.assert(self.manifest_file == null); + self.blake3.update(&[_]u8{@boolToInt(val)}); + } + + pub fn addInt(self: *@This(), val: var) void { + debug.assert(self.manifest_file == null); + + switch (@typeInfo(@TypeOf(val))) { + .Int => |int_info| { + if (int_info.bits == 0 or int_info.bits % 8 != 0) { + @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); + } + + const buf_len = @divExact(int_info.bits, 8); + var buf: [buf_len]u8 = undefined; + mem.writeIntNative(@TypeOf(val), &buf, val); + self.addSlice(&buf); + + self.blake3.update(&[_]u8{0}); + }, + else => @compileError("Type must be an integer."), + } + } + pub fn add(self: *@This(), val: var) void { debug.assert(self.manifest_file == null); const val_type = @TypeOf(val); switch (@typeInfo(val_type)) { - .Int => |int_info| if (int_info.bits != 0 and int_info.bits % 8 == 0) { - const buf_len = @divExact(int_info.bits, 8); - var buf: [buf_len]u8 = undefined; - mem.writeIntNative(val_type, &buf, val); - self.addSlice(&buf); - } else { - @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); - }, - .Bool => { - var buf: [1]u8 = undefined; - buf[0] = if (val) 1 else 0; - self.blake3.update(&buf); - }, + .Int => self.addInt(val), + .Bool => self.addBool(val), else => @compileError("Unsupported type"), } } From 50cbf1f3aa636badec64bcd6d1905cd3c8d67c16 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 21:58:15 -0700 Subject: [PATCH 12/49] Add slice and array support to `add` method --- lib/std/cache_hash.zig | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index fa2b2e08c0..2b1d86a1e8 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -98,6 +98,21 @@ pub const CacheHash = struct { switch (@typeInfo(val_type)) { .Int => self.addInt(val), .Bool => self.addBool(val), + .Array => |array_info| if (array_info.child == u8) { + self.addSlice(val[0..]); + } else { + @compileError("Unsupported array type"); + }, + .Pointer => |ptr_info| switch (ptr_info.size) { + .Slice => if (ptr_info.child == u8) { + self.addSlice(val); + }, + .One => self.add(val.*), + else => { + @compileLog("Pointer type: ", ptr_info.size, ptr_info.child); + @compileError("Unsupported pointer type"); + }, + }, else => @compileError("Unsupported type"), } } @@ -328,7 +343,7 @@ test "cache file and the recall it" { ch.add(true); ch.add(@as(u16, 1234)); - ch.addSlice("1234"); + ch.add("1234"); try ch.cache_file("test.txt"); // There should be nothing in the cache @@ -342,7 +357,7 @@ test "cache file and the recall it" { ch.add(true); ch.add(@as(u16, 1234)); - ch.addSlice("1234"); + ch.add("1234"); try ch.cache_file("test.txt"); // Cache hit! We just "built" the same file From e75a6e514444849ca32de88511804c888137b9a1 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 21:59:58 -0700 Subject: [PATCH 13/49] Rename `cache_file` -> `addFile` --- lib/std/cache_hash.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 2b1d86a1e8..20d5300b7b 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -117,7 +117,7 @@ pub const CacheHash = struct { } } - pub fn cache_file(self: *@This(), file_path: []const u8) !void { + pub fn addFile(self: *@This(), file_path: []const u8) !void { debug.assert(self.manifest_file == null); var cache_hash_file = try self.files.addOne(); @@ -344,7 +344,7 @@ test "cache file and the recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.cache_file("test.txt"); + try ch.addFile("test.txt"); // There should be nothing in the cache debug.assert((try ch.hit(&digest1)) == false); @@ -358,7 +358,7 @@ test "cache file and the recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.cache_file("test.txt"); + try ch.addFile("test.txt"); // Cache hit! We just "built" the same file debug.assert((try ch.hit(&digest2)) == true); From 8e4b80522f70c7d5f636a967d61706cddc473f9e Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 22:19:43 -0700 Subject: [PATCH 14/49] Return base64 digest instead of using an out variable --- lib/std/cache_hash.zig | 51 ++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 20d5300b7b..4b4ae8f74c 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -13,6 +13,7 @@ const os = @import("os.zig"); const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; const BIN_DIGEST_LEN = 32; +const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); pub const File = struct { path: ?[]const u8, @@ -41,7 +42,7 @@ pub const CacheHash = struct { manifest_dirty: bool, force_check_manifest: bool, files: ArrayList(File), - b64_digest: ArrayList(u8), + b64_digest: [BASE64_DIGEST_LEN]u8, pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { try fs.cwd().makePath(manifest_dir_path); @@ -55,7 +56,7 @@ pub const CacheHash = struct { .manifest_dirty = false, .force_check_manifest = false, .files = ArrayList(File).init(alloc), - .b64_digest = ArrayList(u8).init(alloc), + .b64_digest = undefined, }; } @@ -126,27 +127,23 @@ pub const CacheHash = struct { self.addSlice(cache_hash_file.path.?); } - pub fn hit(self: *@This(), out_digest: *ArrayList(u8)) !bool { + pub fn hit(self: *@This()) !?[BASE64_DIGEST_LEN]u8 { debug.assert(self.manifest_file == null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); - const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); - try self.b64_digest.resize(OUT_DIGEST_LEN); - base64_encoder.encode(self.b64_digest.toSlice(), &bin_digest); + base64_encoder.encode(self.b64_digest[0..], &bin_digest); if (self.files.toSlice().len == 0 and !self.force_check_manifest) { - try out_digest.resize(OUT_DIGEST_LEN); - mem.copy(u8, out_digest.toSlice(), self.b64_digest.toSlice()); - return true; + return self.b64_digest; } self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); { - const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest.toSlice()}); + const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest}); defer self.alloc.free(manifest_file_path); self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false }); @@ -231,7 +228,7 @@ pub const CacheHash = struct { for (self.files.toSlice()) |file| { self.blake3.update(&file.bin_digest); } - return false; + return null; } if (idx < input_file_count or idx == 0) { @@ -244,11 +241,10 @@ pub const CacheHash = struct { return error.CacheUnavailable; }; } - return false; + return null; } - try self.final(out_digest); - return true; + return try self.final(); } pub fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { @@ -265,22 +261,22 @@ pub const CacheHash = struct { self.blake3.update(&cache_hash_file.bin_digest); } - pub fn final(self: *@This(), out_digest: *ArrayList(u8)) !void { + pub fn final(self: *@This()) ![BASE64_DIGEST_LEN]u8 { debug.assert(self.manifest_file != null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); - const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); - try out_digest.resize(OUT_DIGEST_LEN); - base64_encoder.encode(out_digest.toSlice(), &bin_digest); + var out_digest: [BASE64_DIGEST_LEN]u8 = undefined; + base64_encoder.encode(&out_digest, &bin_digest); + + return out_digest; } pub fn write_manifest(self: *@This()) !void { debug.assert(self.manifest_file != null); - const OUT_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); - var encoded_digest = try Buffer.initSize(self.alloc, OUT_DIGEST_LEN); + var encoded_digest = try Buffer.initSize(self.alloc, BASE64_DIGEST_LEN); defer encoded_digest.deinit(); var contents = try Buffer.init(self.alloc, ""); defer contents.deinit(); @@ -308,7 +304,6 @@ pub const CacheHash = struct { file.deinit(self.alloc); } self.files.deinit(); - self.b64_digest.deinit(); self.manifest_dir.close(); } }; @@ -332,10 +327,8 @@ test "cache file and the recall it" { try cwd.writeFile("test.txt", "Hello, world!\n"); - var digest1 = try ArrayList(u8).initCapacity(testing.allocator, 32); - defer digest1.deinit(); - var digest2 = try ArrayList(u8).initCapacity(testing.allocator, 32); - defer digest2.deinit(); + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; + var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); @@ -347,9 +340,9 @@ test "cache file and the recall it" { try ch.addFile("test.txt"); // There should be nothing in the cache - debug.assert((try ch.hit(&digest1)) == false); + debug.assert((try ch.hit()) == null); - try ch.final(&digest1); + digest1 = try ch.final(); } { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); @@ -361,10 +354,10 @@ test "cache file and the recall it" { try ch.addFile("test.txt"); // Cache hit! We just "built" the same file - debug.assert((try ch.hit(&digest2)) == true); + digest2 = (try ch.hit()).?; } - debug.assert(mem.eql(u8, digest1.toSlice(), digest2.toSlice())); + debug.assert(mem.eql(u8, digest1[0..], digest2[0..])); try cwd.deleteTree(temp_manifest_dir); } From 4f709d224ae02c7875fbba31094f3cbc44f56807 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 22:27:20 -0700 Subject: [PATCH 15/49] Make hash digest same size as in the c API --- lib/std/cache_hash.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 4b4ae8f74c..d0b1b81bd8 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -12,7 +12,7 @@ const os = @import("os.zig"); const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; -const BIN_DIGEST_LEN = 32; +const BIN_DIGEST_LEN = 48; const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); pub const File = struct { @@ -203,7 +203,7 @@ pub const CacheHash = struct { // TODO: check for problematic timestamp - var actual_digest: [32]u8 = undefined; + var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; try hash_file(self.alloc, &actual_digest, &this_file); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { From 061c1fd9abf61e5d7d1cf8cbb4aa24c1e4eadde0 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 22:42:59 -0700 Subject: [PATCH 16/49] Use `readAllAlloc` Now that the memory leak mentioned in #4656 has been fixed. --- lib/std/cache_hash.zig | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index d0b1b81bd8..dc9d5ffedd 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -149,14 +149,9 @@ pub const CacheHash = struct { self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false }); } - // create a buffer instead of using readAllAlloc - // See: https://github.com/ziglang/zig/issues/4656 - var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024); - defer file_buffer.deinit(); - // TODO: Figure out a good max value? - try self.manifest_file.?.inStream().stream.readAllBuffer(&file_buffer, 16 * 1024); - const file_contents = file_buffer.toSliceConst(); + const file_contents = try self.manifest_file.?.inStream().stream.readAllAlloc(self.alloc, 16 * 1024); + defer self.alloc.free(file_contents); const input_file_count = self.files.len; var any_file_changed = false; From 21d7430696158a3aa486d620bf7251f6134af9b9 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 6 Mar 2020 22:45:48 -0700 Subject: [PATCH 17/49] Replace ArrayList in write_manifest with an array --- lib/std/cache_hash.zig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index dc9d5ffedd..85e5d703cb 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -271,14 +271,13 @@ pub const CacheHash = struct { pub fn write_manifest(self: *@This()) !void { debug.assert(self.manifest_file != null); - var encoded_digest = try Buffer.initSize(self.alloc, BASE64_DIGEST_LEN); - defer encoded_digest.deinit(); + var encoded_digest: [BASE64_DIGEST_LEN]u8 = undefined; var contents = try Buffer.init(self.alloc, ""); defer contents.deinit(); for (self.files.toSlice()) |file| { - base64_encoder.encode(encoded_digest.toSlice(), &file.bin_digest); - try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path }); + base64_encoder.encode(encoded_digest[0..], &file.bin_digest); + try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest[0..], file.path }); } try self.manifest_file.?.seekTo(0); From fffd59e6c4c349993f847e7ecdeff25741a11810 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Sun, 8 Mar 2020 15:11:06 -0600 Subject: [PATCH 18/49] Remove file handle from CacheHash A file handle is not the same thing as an inode index number. Eventually the inode will be checked as well, but there needs to be a way to get the inode in `std` first. --- lib/std/cache_hash.zig | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 85e5d703cb..88b3e61cd8 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -18,7 +18,6 @@ const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); pub const File = struct { path: ?[]const u8, stat: fs.File.Stat, - file_handle: os.fd_t, bin_digest: [BIN_DIGEST_LEN]u8, contents: ?[]const u8, @@ -169,12 +168,10 @@ pub const CacheHash = struct { } var iter = mem.tokenize(line, " "); - const file_handle_str = iter.next() orelse return error.InvalidFormat; const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; const digest_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); - cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat; cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; @@ -191,11 +188,16 @@ pub const CacheHash = struct { return error.CacheUnavailable; }; defer this_file.close(); - cache_hash_file.stat = try this_file.stat(); - // TODO: check mtime - if (false) {} else { + + const actual_stat = try this_file.stat(); + const mtime_matches = actual_stat.mtime == cache_hash_file.stat.mtime; + + // TODO: check inode + if (!mtime_matches) { self.manifest_dirty = true; + cache_hash_file.stat = actual_stat; + // TODO: check for problematic timestamp var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; @@ -277,7 +279,7 @@ pub const CacheHash = struct { for (self.files.toSlice()) |file| { base64_encoder.encode(encoded_digest[0..], &file.bin_digest); - try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest[0..], file.path }); + try contents.print("{} {} {}\n", .{ file.stat.mtime, encoded_digest[0..], file.path }); } try self.manifest_file.?.seekTo(0); From 16c54990989fa8b25038536b04d2ec6cf99e2e1c Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Sun, 8 Mar 2020 15:13:40 -0600 Subject: [PATCH 19/49] Remove up files created in test at end of test --- lib/std/cache_hash.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 88b3e61cd8..bb5b4b710a 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -319,9 +319,10 @@ fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void test "cache file and the recall it" { const cwd = fs.cwd(); + const temp_file = "test.txt"; const temp_manifest_dir = "temp_manifest_dir"; - try cwd.writeFile("test.txt", "Hello, world!\n"); + try cwd.writeFile(temp_file, "Hello, world!\n"); var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; @@ -356,4 +357,5 @@ test "cache file and the recall it" { debug.assert(mem.eql(u8, digest1[0..], digest2[0..])); try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file); } From 7917f25b0a8e8d8cafea3bacbb13a3c2b03b9a97 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 7 Apr 2020 20:12:21 -0600 Subject: [PATCH 20/49] Update cache_hash to zig master --- lib/std/cache_hash.zig | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index bb5b4b710a..c4b1f0f5d0 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -7,7 +7,6 @@ const testing = @import("testing.zig"); const mem = @import("mem.zig"); const fmt = @import("fmt.zig"); const Allocator = mem.Allocator; -const Buffer = @import("buffer.zig").Buffer; const os = @import("os.zig"); const base64_encoder = fs.base64_encoder; @@ -45,7 +44,7 @@ pub const CacheHash = struct { pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { try fs.cwd().makePath(manifest_dir_path); - const manifest_dir = try fs.cwd().openDirTraverse(manifest_dir_path); + const manifest_dir = try fs.cwd().openDir(manifest_dir_path, .{ .iterate = true }); return CacheHash{ .alloc = alloc, @@ -149,10 +148,10 @@ pub const CacheHash = struct { } // TODO: Figure out a good max value? - const file_contents = try self.manifest_file.?.inStream().stream.readAllAlloc(self.alloc, 16 * 1024); + const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.alloc, 16 * 1024); defer self.alloc.free(file_contents); - const input_file_count = self.files.len; + const input_file_count = self.files.items.len; var any_file_changed = false; var line_iter = mem.tokenize(file_contents, "\n"); var idx: usize = 0; @@ -274,16 +273,17 @@ pub const CacheHash = struct { debug.assert(self.manifest_file != null); var encoded_digest: [BASE64_DIGEST_LEN]u8 = undefined; - var contents = try Buffer.init(self.alloc, ""); + var contents = ArrayList(u8).init(self.alloc); + var outStream = contents.outStream(); defer contents.deinit(); for (self.files.toSlice()) |file| { base64_encoder.encode(encoded_digest[0..], &file.bin_digest); - try contents.print("{} {} {}\n", .{ file.stat.mtime, encoded_digest[0..], file.path }); + try outStream.print("{} {} {}\n", .{ file.stat.mtime, encoded_digest[0..], file.path }); } try self.manifest_file.?.seekTo(0); - try self.manifest_file.?.writeAll(contents.toSlice()); + try self.manifest_file.?.writeAll(contents.items); } pub fn release(self: *@This()) void { @@ -306,9 +306,8 @@ pub const CacheHash = struct { fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void { var blake3 = Blake3.init(); - var in_stream = handle.inStream().stream; - const contents = try handle.inStream().stream.readAllAlloc(alloc, 64 * 1024); + const contents = try handle.inStream().readAllAlloc(alloc, 64 * 1024); defer alloc.free(contents); blake3.update(contents); From dfb53beb52689519d395541d62519ff01c3d7785 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 7 Apr 2020 20:19:49 -0600 Subject: [PATCH 21/49] Check if inode matches inode from manifest --- lib/std/cache_hash.zig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index c4b1f0f5d0..6da64f9302 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -167,10 +167,12 @@ pub const CacheHash = struct { } var iter = mem.tokenize(line, " "); + const inode = iter.next() orelse return error.InvalidFormat; const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; const digest_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); + cache_hash_file.stat.inode = fmt.parseInt(os.ino_t, mtime_nsec_str, 10) catch return error.InvalidFormat; cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; @@ -189,10 +191,10 @@ pub const CacheHash = struct { defer this_file.close(); const actual_stat = try this_file.stat(); - const mtime_matches = actual_stat.mtime == cache_hash_file.stat.mtime; + const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime; + const inode_match = actual_stat.inode == cache_hash_file.stat.inode; - // TODO: check inode - if (!mtime_matches) { + if (!mtime_match or !inode_match) { self.manifest_dirty = true; cache_hash_file.stat = actual_stat; @@ -279,7 +281,7 @@ pub const CacheHash = struct { for (self.files.toSlice()) |file| { base64_encoder.encode(encoded_digest[0..], &file.bin_digest); - try outStream.print("{} {} {}\n", .{ file.stat.mtime, encoded_digest[0..], file.path }); + try outStream.print("{} {} {} {}\n", .{ file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path }); } try self.manifest_file.?.seekTo(0); From c88ece3679863cd55895bb9696d304c8d8754a4a Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 7 Apr 2020 20:41:46 -0600 Subject: [PATCH 22/49] Remove error union from CacheHash.final --- lib/std/cache_hash.zig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6da64f9302..38549312b6 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -242,7 +242,7 @@ pub const CacheHash = struct { return null; } - return try self.final(); + return self.final(); } pub fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { @@ -259,7 +259,8 @@ pub const CacheHash = struct { self.blake3.update(&cache_hash_file.bin_digest); } - pub fn final(self: *@This()) ![BASE64_DIGEST_LEN]u8 { + /// Returns a base64 encoded hash of the inputs. + pub fn final(self: *@This()) [BASE64_DIGEST_LEN]u8 { debug.assert(self.manifest_file != null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; @@ -340,7 +341,7 @@ test "cache file and the recall it" { // There should be nothing in the cache debug.assert((try ch.hit()) == null); - digest1 = try ch.final(); + digest1 = ch.final(); } { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); From 204aa7dc1a183bb5fba91328d4f436c6f91471af Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 7 Apr 2020 23:57:59 -0600 Subject: [PATCH 23/49] Remove unnecessary contents field from File It was causing a segfault on `mipsel` architecture, not sure why other architectures weren't affected. --- lib/std/cache_hash.zig | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 38549312b6..f63db444cf 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -18,17 +18,12 @@ pub const File = struct { path: ?[]const u8, stat: fs.File.Stat, bin_digest: [BIN_DIGEST_LEN]u8, - contents: ?[]const u8, pub fn deinit(self: *@This(), alloc: *Allocator) void { if (self.path) |owned_slice| { alloc.free(owned_slice); self.path = null; } - if (self.contents) |owned_slice| { - alloc.free(owned_slice); - self.contents = null; - } } }; @@ -232,7 +227,7 @@ pub const CacheHash = struct { if (idx < input_file_count or idx == 0) { self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - var cache_hash_file = self.files.ptrAt(idx); + var cache_hash_file = &self.files.items[idx]; self.populate_file_hash(cache_hash_file) catch |err| { self.manifest_file.?.close(); self.manifest_file = null; @@ -318,7 +313,7 @@ fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void blake3.final(bin_digest); } -test "cache file and the recall it" { +test "cache file and then recall it" { const cwd = fs.cwd(); const temp_file = "test.txt"; From 73d2747084e92b2907ecf71f180a955ec411975e Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Sat, 11 Apr 2020 15:24:20 -0600 Subject: [PATCH 24/49] Open file with exclusive lock --- lib/std/cache_hash.zig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index f63db444cf..c4610480f3 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -139,7 +139,11 @@ pub const CacheHash = struct { const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest}); defer self.alloc.free(manifest_file_path); - self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false }); + self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + }); } // TODO: Figure out a good max value? @@ -213,7 +217,7 @@ pub const CacheHash = struct { if (any_file_changed) { // cache miss - // keep the manifest file open (TODO: with rw lock) + // keep the manifest file open // reset the hash self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); From 67d6432d10a081338f719eff4a9dfc9a5eb5b457 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Sat, 11 Apr 2020 16:01:17 -0600 Subject: [PATCH 25/49] Check for problematic timestamps --- lib/std/cache_hash.zig | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index c4610480f3..85a9c13f7e 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -8,6 +8,7 @@ const mem = @import("mem.zig"); const fmt = @import("fmt.zig"); const Allocator = mem.Allocator; const os = @import("os.zig"); +const time = @import("time.zig"); const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; @@ -198,7 +199,10 @@ pub const CacheHash = struct { cache_hash_file.stat = actual_stat; - // TODO: check for problematic timestamp + if (is_problematic_timestamp(cache_hash_file.stat.mtime)) { + cache_hash_file.stat.mtime = 0; + cache_hash_file.stat.inode = 0; + } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; try hash_file(self.alloc, &actual_digest, &this_file); @@ -252,7 +256,10 @@ pub const CacheHash = struct { cache_hash_file.stat = try this_file.stat(); - // TODO: check for problematic timestamp + if (is_problematic_timestamp(cache_hash_file.stat.mtime)) { + cache_hash_file.stat.mtime = 0; + cache_hash_file.stat.inode = 0; + } try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file); self.blake3.update(&cache_hash_file.bin_digest); @@ -317,6 +324,16 @@ fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void blake3.final(bin_digest); } +/// If the wall clock time, rounded to the same precision as the +/// mtime, is equal to the mtime, then we cannot rely on this mtime +/// yet. We will instead save an mtime value that indicates the hash +/// must be unconditionally computed. +fn is_problematic_timestamp(file_mtime_ns: i64) bool { + const now_ms = time.milliTimestamp(); + const file_mtime_ms = @divFloor(file_mtime_ns, time.millisecond); + return now_ms == file_mtime_ms; +} + test "cache file and then recall it" { const cwd = fs.cwd(); @@ -360,3 +377,13 @@ test "cache file and then recall it" { try cwd.deleteTree(temp_manifest_dir); try cwd.deleteFile(temp_file); } + +test "give problematic timestamp" { + const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond); + debug.assert(is_problematic_timestamp(now_ns)); +} + +test "give nonproblematic timestamp" { + const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond) - 1000; + debug.assert(!is_problematic_timestamp(now_ns)); +} From d770dae1b8ad385d47ec716ab23eca8ca92c31d5 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 7 Apr 2020 23:57:19 -0600 Subject: [PATCH 26/49] Add documentation to CacheHash API --- lib/std/cache_hash.zig | 73 ++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 85a9c13f7e..e62da0a530 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -54,6 +54,7 @@ pub const CacheHash = struct { }; } + /// Record a slice of bytes as an dependency of the process being cached pub fn addSlice(self: *@This(), val: []const u8) void { debug.assert(self.manifest_file == null); @@ -61,57 +62,23 @@ pub const CacheHash = struct { self.blake3.update(&[_]u8{0}); } - pub fn addBool(self: *@This(), val: bool) void { - debug.assert(self.manifest_file == null); - self.blake3.update(&[_]u8{@boolToInt(val)}); - } - - pub fn addInt(self: *@This(), val: var) void { - debug.assert(self.manifest_file == null); - - switch (@typeInfo(@TypeOf(val))) { - .Int => |int_info| { - if (int_info.bits == 0 or int_info.bits % 8 != 0) { - @compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice."); - } - - const buf_len = @divExact(int_info.bits, 8); - var buf: [buf_len]u8 = undefined; - mem.writeIntNative(@TypeOf(val), &buf, val); - self.addSlice(&buf); - - self.blake3.update(&[_]u8{0}); - }, - else => @compileError("Type must be an integer."), - } - } - + /// Convert the input value into bytes and record it as a dependency of the + /// process being cached pub fn add(self: *@This(), val: var) void { debug.assert(self.manifest_file == null); - const val_type = @TypeOf(val); - switch (@typeInfo(val_type)) { - .Int => self.addInt(val), - .Bool => self.addBool(val), - .Array => |array_info| if (array_info.child == u8) { - self.addSlice(val[0..]); - } else { - @compileError("Unsupported array type"); - }, - .Pointer => |ptr_info| switch (ptr_info.size) { - .Slice => if (ptr_info.child == u8) { - self.addSlice(val); - }, - .One => self.add(val.*), - else => { - @compileLog("Pointer type: ", ptr_info.size, ptr_info.child); - @compileError("Unsupported pointer type"); - }, - }, - else => @compileError("Unsupported type"), - } + const valPtr = switch (@typeInfo(@TypeOf(val))) { + .Int => &val, + .Pointer => val, + else => &val, + }; + + self.addSlice(mem.asBytes(valPtr)); } + /// Add a file as a dependency of process being cached. When `CacheHash.hit` is + /// called, the file's contents will be checked to ensure that it matches + /// the contents from previous times. pub fn addFile(self: *@This(), file_path: []const u8) !void { debug.assert(self.manifest_file == null); @@ -121,6 +88,14 @@ pub const CacheHash = struct { self.addSlice(cache_hash_file.path.?); } + /// Check the cache to see if the input exists in it. If it exists, a base64 encoding + /// of it's hash will be returned; otherwise, null will be returned. + /// + /// This function will also acquire an exclusive lock to the manifest file. This means + /// that a process holding a CacheHash will block any other process attempting to + /// acquire the lock. + /// + /// The lock on the manifest file is released when `CacheHash.release` is called. pub fn hit(self: *@This()) !?[BASE64_DIGEST_LEN]u8 { debug.assert(self.manifest_file == null); @@ -269,6 +244,12 @@ pub const CacheHash = struct { pub fn final(self: *@This()) [BASE64_DIGEST_LEN]u8 { debug.assert(self.manifest_file != null); + // We don't close the manifest file yet, because we want to + // keep it locked until the API user is done using it. + // We also don't write out the manifest yet, because until + // cache_release is called we still might be working on creating + // the artifacts to cache. + var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); From af730c64bd760a2fee7418b5d4b8912e3aec4eed Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 19:07:27 -0600 Subject: [PATCH 27/49] Put base64 alphabet into a named constant --- lib/std/fs.zig | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index c3e8d69b0d..99383277a7 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -49,17 +49,13 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { else => @compileError("Unsupported OS"), }; -/// Base64, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. -pub const base64_encoder = base64.Base64Encoder.init( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", - base64.standard_pad_char, -); +pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -/// Base64, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. -pub const base64_decoder = base64.Base64Decoder.init( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", - base64.standard_pad_char, -); +/// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. +pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, base64.standard_pad_char); + +/// Base64 decoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. +pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, base64.standard_pad_char); /// Whether or not async file system syscalls need a dedicated thread because the operating /// system does not support non-blocking I/O on the file system. From 05edfe983ce59c1fe1b1c652065651714ce59c52 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 19:33:02 -0600 Subject: [PATCH 28/49] Add `addFilePost` and `addFilePostFetch` functions --- lib/std/cache_hash.zig | 45 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index e62da0a530..b00f11c272 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -180,7 +180,8 @@ pub const CacheHash = struct { } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; - try hash_file(self.alloc, &actual_digest, &this_file); + const contents = try hash_file(self.alloc, &actual_digest, &this_file); + self.alloc.free(contents); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); @@ -211,7 +212,7 @@ pub const CacheHash = struct { self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { var cache_hash_file = &self.files.items[idx]; - self.populate_file_hash(cache_hash_file) catch |err| { + const contents = self.populate_file_hash(cache_hash_file) catch |err| { self.manifest_file.?.close(); self.manifest_file = null; return error.CacheUnavailable; @@ -223,7 +224,7 @@ pub const CacheHash = struct { return self.final(); } - pub fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { + fn populate_file_hash_fetch(self: *@This(), otherAlloc: *mem.Allocator, cache_hash_file: *File) ![]u8 { debug.assert(cache_hash_file.path != null); const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); @@ -236,8 +237,38 @@ pub const CacheHash = struct { cache_hash_file.stat.inode = 0; } - try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file); + const contents = try hash_file(otherAlloc, &cache_hash_file.bin_digest, &this_file); self.blake3.update(&cache_hash_file.bin_digest); + + return contents; + } + + fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { + const contents = try self.populate_file_hash_fetch(self.alloc, cache_hash_file); + self.alloc.free(contents); + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. Returns the contents of the file, allocated with the given allocator. + pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8) ![]u8 { + debug.assert(self.manifest_file != null); + + var cache_hash_file = try self.files.addOne(); + cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); + + const contents = self.populate_file_hash_fetch(otherAlloc, cache_hash_file) catch |err| { + self.manifest_file.close(); + return err; + }; + + return contents; + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. + pub fn addFilePost(self: *@This(), file_path: []const u8) !void { + const contents = try self.addFilePostFetch(self.alloc, file_path); + self.alloc.free(contents); } /// Returns a base64 encoded hash of the inputs. @@ -294,15 +325,17 @@ pub const CacheHash = struct { } }; -fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void { +/// Hash the file, and return the contents as an array +fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) ![]u8 { var blake3 = Blake3.init(); const contents = try handle.inStream().readAllAlloc(alloc, 64 * 1024); - defer alloc.free(contents); blake3.update(contents); blake3.final(bin_digest); + + return contents; } /// If the wall clock time, rounded to the same precision as the From d457919ff56d9b2044cb9064dd12a32481b656bf Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 21:27:20 -0600 Subject: [PATCH 29/49] Make CacheHash cleanup consistent (always call `release`) Instead of releasing the manifest file when an error occurs, it is only released when when `CacheHash.release` is called. This maps better to what a zig user expects when they do `defer cache_hash.release()`. --- lib/std/cache_hash.zig | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index b00f11c272..d44e3e2d73 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -159,8 +159,6 @@ pub const CacheHash = struct { } const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { - self.manifest_file.?.close(); - self.manifest_file = null; return error.CacheUnavailable; }; defer this_file.close(); @@ -213,8 +211,6 @@ pub const CacheHash = struct { while (idx < input_file_count) : (idx += 1) { var cache_hash_file = &self.files.items[idx]; const contents = self.populate_file_hash(cache_hash_file) catch |err| { - self.manifest_file.?.close(); - self.manifest_file = null; return error.CacheUnavailable; }; } @@ -256,12 +252,7 @@ pub const CacheHash = struct { var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - const contents = self.populate_file_hash_fetch(otherAlloc, cache_hash_file) catch |err| { - self.manifest_file.close(); - return err; - }; - - return contents; + return try self.populate_file_hash_fetch(otherAlloc, cache_hash_file); } /// Add a file as a dependency of process being cached, after the initial hash has been @@ -317,6 +308,7 @@ pub const CacheHash = struct { } self.manifest_file.?.close(); + for (self.files.toSlice()) |*file| { file.deinit(self.alloc); } From b67a9f2281be6712b299a4dd8bd747d8a5fda882 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 21:34:40 -0600 Subject: [PATCH 30/49] Switch to using `testing.expect*` in tests --- lib/std/cache_hash.zig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index d44e3e2d73..b44aaac6f3 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -358,10 +358,10 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.addFile("test.txt"); + try ch.addFile(temp_file); // There should be nothing in the cache - debug.assert((try ch.hit()) == null); + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); digest1 = ch.final(); } @@ -372,13 +372,13 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.addFile("test.txt"); + try ch.addFile(temp_file); // Cache hit! We just "built" the same file digest2 = (try ch.hit()).?; } - debug.assert(mem.eql(u8, digest1[0..], digest2[0..])); + testing.expectEqual(digest1, digest2); try cwd.deleteTree(temp_manifest_dir); try cwd.deleteFile(temp_file); @@ -386,10 +386,10 @@ test "cache file and then recall it" { test "give problematic timestamp" { const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond); - debug.assert(is_problematic_timestamp(now_ns)); + testing.expect(is_problematic_timestamp(now_ns)); } test "give nonproblematic timestamp" { const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond) - 1000; - debug.assert(!is_problematic_timestamp(now_ns)); + testing.expect(!is_problematic_timestamp(now_ns)); } From 4254d389d3b21e3c7613c327810b47483b92329b Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 21:37:35 -0600 Subject: [PATCH 31/49] Add test checking file changes invalidate cache --- lib/std/cache_hash.zig | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index b44aaac6f3..71bd2dab93 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -393,3 +393,48 @@ test "give nonproblematic timestamp" { const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond) - 1000; testing.expect(!is_problematic_timestamp(now_ns)); } + +test "check that changing a file makes cache fail" { + const cwd = fs.cwd(); + + const temp_file = "cache_hash_change_file_test.txt"; + const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; + + try cwd.writeFile(temp_file, "Hello, world!\n"); + + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; + var digest2: [BASE64_DIGEST_LEN]u8 = undefined; + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + ch.add("1234"); + try ch.addFile(temp_file); + + // There should be nothing in the cache + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + + digest1 = ch.final(); + } + + try cwd.writeFile(temp_file, "Hello, world; but updated!\n"); + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + ch.add("1234"); + try ch.addFile(temp_file); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + + digest2 = ch.final(); + } + + testing.expect(!mem.eql(u8, digest1[0..], digest2[0..])); + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file); +} From 967b9825a7b57586cd34f51d9b915505ffbd455d Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 21:39:34 -0600 Subject: [PATCH 32/49] Add "no file inputs" test It checks whether the cache will respond correctly to inputs that don't initially depend on filesystem state. In that case, we have to check for the existence of a manifest file, instead of relying on reading the list of entries to tell us if the cache is invalid. --- lib/std/cache_hash.zig | 64 +++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 71bd2dab93..f57f8cd146 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -34,7 +34,6 @@ pub const CacheHash = struct { manifest_dir: fs.Dir, manifest_file: ?fs.File, manifest_dirty: bool, - force_check_manifest: bool, files: ArrayList(File), b64_digest: [BASE64_DIGEST_LEN]u8, @@ -48,7 +47,6 @@ pub const CacheHash = struct { .manifest_dir = manifest_dir, .manifest_file = null, .manifest_dirty = false, - .force_check_manifest = false, .files = ArrayList(File).init(alloc), .b64_digest = undefined, }; @@ -104,22 +102,37 @@ pub const CacheHash = struct { base64_encoder.encode(self.b64_digest[0..], &bin_digest); - if (self.files.toSlice().len == 0 and !self.force_check_manifest) { - return self.b64_digest; - } - self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); - { - const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest}); - defer self.alloc.free(manifest_file_path); + const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest}); + defer self.alloc.free(manifest_file_path); + if (self.files.items.len != 0) { self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false, .lock = .Exclusive, }); + } else { + // If there are no file inputs, we check if the manifest file exists instead of + // comparing the hashes on the files used for the cached item + self.manifest_file = self.manifest_dir.openFile(manifest_file_path, .{ + .read = true, + .write = true, + .lock = .Exclusive, + }) catch |err| switch (err) { + error.FileNotFound => { + self.manifest_dirty = true; + self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + }); + return null; + }, + else => |e| return e, + }; } // TODO: Figure out a good max value? @@ -206,7 +219,7 @@ pub const CacheHash = struct { return null; } - if (idx < input_file_count or idx == 0) { + if (idx < input_file_count) { self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { var cache_hash_file = &self.files.items[idx]; @@ -438,3 +451,34 @@ test "check that changing a file makes cache fail" { try cwd.deleteTree(temp_manifest_dir); try cwd.deleteFile(temp_file); } + +test "no file inputs" { + const cwd = fs.cwd(); + const temp_manifest_dir = "no_file_inputs_manifest_dir"; + defer cwd.deleteTree(temp_manifest_dir) catch unreachable; + + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; + var digest2: [BASE64_DIGEST_LEN]u8 = undefined; + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + ch.add("1234"); + + // There should be nothing in the cache + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + + digest1 = ch.final(); + } + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release(); + + ch.add("1234"); + + digest2 = (try ch.hit()).?; + } + + testing.expectEqual(digest1, digest2); +} From e7657f2938e9061a546c5ad726a9575ad6a8eba3 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 21:51:20 -0600 Subject: [PATCH 33/49] Make `CacheHash.release` return an error If a user doesn't care that the manifest failed to be written, they can simply ignore it. The program will still work; that particular cache item will simply not be cached. --- lib/std/cache_hash.zig | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index f57f8cd146..4a5f9afff1 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -311,13 +311,17 @@ pub const CacheHash = struct { try self.manifest_file.?.writeAll(contents.items); } - pub fn release(self: *@This()) void { + /// Releases the manifest file and frees any memory the CacheHash was using. + /// `CacheHash.hit` must be called first. + /// + /// Will also attempt to write to the manifest file if the manifest is dirty. + /// Writing to the manifest file is the only way that this file can return an + /// error. + pub fn release(self: *@This()) !void { debug.assert(self.manifest_file != null); if (self.manifest_dirty) { - self.write_manifest() catch |err| { - debug.warn("Unable to write cache file '{}': {}\n", .{ self.b64_digest, err }); - }; + try self.write_manifest(); } self.manifest_file.?.close(); @@ -366,7 +370,7 @@ test "cache file and then recall it" { { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add(true); ch.add(@as(u16, 1234)); @@ -380,7 +384,7 @@ test "cache file and then recall it" { } { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add(true); ch.add(@as(u16, 1234)); @@ -420,7 +424,7 @@ test "check that changing a file makes cache fail" { { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add("1234"); try ch.addFile(temp_file); @@ -435,7 +439,7 @@ test "check that changing a file makes cache fail" { { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add("1234"); try ch.addFile(temp_file); @@ -462,7 +466,7 @@ test "no file inputs" { { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add("1234"); @@ -473,7 +477,7 @@ test "no file inputs" { } { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release(); + defer ch.release() catch unreachable; ch.add("1234"); From b429f4607c320118f5e0443605b29dda39521204 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Tue, 14 Apr 2020 22:17:55 -0600 Subject: [PATCH 34/49] Make `addFilePost*` functions' documentation more clear --- lib/std/cache_hash.zig | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 4a5f9afff1..bff7a09c1a 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -258,7 +258,11 @@ pub const CacheHash = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. Returns the contents of the file, allocated with the given allocator. + /// calculated. This is useful for processes that don't know the all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. + /// + /// Returns the contents of the file, allocated with the given allocator. pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8) ![]u8 { debug.assert(self.manifest_file != null); @@ -269,7 +273,9 @@ pub const CacheHash = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. + /// calculated. This is useful for processes that don't know the all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. pub fn addFilePost(self: *@This(), file_path: []const u8) !void { const contents = try self.addFilePostFetch(self.alloc, file_path); self.alloc.free(contents); From 4d62d97076a10cc6371fb6cbbfec8c906611a72b Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Wed, 15 Apr 2020 19:45:23 -0600 Subject: [PATCH 35/49] Update code using deprecated ArrayList APIs --- lib/std/cache_hash.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index bff7a09c1a..6fa3d7b5db 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -148,7 +148,7 @@ pub const CacheHash = struct { var cache_hash_file: *File = undefined; if (idx < input_file_count) { - cache_hash_file = self.files.ptrAt(idx); + cache_hash_file = &self.files.items[idx]; } else { cache_hash_file = try self.files.addOne(); cache_hash_file.path = null; @@ -213,7 +213,7 @@ pub const CacheHash = struct { self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); try self.files.resize(input_file_count); - for (self.files.toSlice()) |file| { + for (self.files.items) |file| { self.blake3.update(&file.bin_digest); } return null; @@ -308,7 +308,7 @@ pub const CacheHash = struct { var outStream = contents.outStream(); defer contents.deinit(); - for (self.files.toSlice()) |file| { + for (self.files.items) |file| { base64_encoder.encode(encoded_digest[0..], &file.bin_digest); try outStream.print("{} {} {} {}\n", .{ file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path }); } @@ -332,7 +332,7 @@ pub const CacheHash = struct { self.manifest_file.?.close(); - for (self.files.toSlice()) |*file| { + for (self.files.items) |*file| { file.deinit(self.alloc); } self.files.deinit(); From f13c67bcfed1c509abd13e83ae36479b793583d2 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Wed, 15 Apr 2020 20:13:26 -0600 Subject: [PATCH 36/49] Return an index from `CacheHash.addFile` This makes it possible for the user to retrieve the contents of the file without running into data races. --- lib/std/cache_hash.zig | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6fa3d7b5db..6b3d6d7ba2 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -19,12 +19,17 @@ pub const File = struct { path: ?[]const u8, stat: fs.File.Stat, bin_digest: [BIN_DIGEST_LEN]u8, + contents: ?[]const u8 = null, pub fn deinit(self: *@This(), alloc: *Allocator) void { if (self.path) |owned_slice| { alloc.free(owned_slice); self.path = null; } + if (self.contents) |contents| { + alloc.free(contents); + self.contents = null; + } } }; @@ -77,13 +82,23 @@ pub const CacheHash = struct { /// Add a file as a dependency of process being cached. When `CacheHash.hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. - pub fn addFile(self: *@This(), file_path: []const u8) !void { + /// + /// Returns the index of the entry in the `CacheHash.files` ArrayList. You can use it + /// to access the contents of the file after calling `CacheHash.hit()` like so: + /// + /// ``` + /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// ``` + pub fn addFile(self: *@This(), file_path: []const u8) !usize { debug.assert(self.manifest_file == null); + const idx = self.files.items.len; var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); self.addSlice(cache_hash_file.path.?); + + return idx; } /// Check the cache to see if the input exists in it. If it exists, a base64 encoding @@ -191,8 +206,7 @@ pub const CacheHash = struct { } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; - const contents = try hash_file(self.alloc, &actual_digest, &this_file); - self.alloc.free(contents); + cache_hash_file.contents = try hash_file(self.alloc, &actual_digest, &this_file); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); @@ -253,8 +267,7 @@ pub const CacheHash = struct { } fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { - const contents = try self.populate_file_hash_fetch(self.alloc, cache_hash_file); - self.alloc.free(contents); + cache_hash_file.contents = try self.populate_file_hash_fetch(self.alloc, cache_hash_file); } /// Add a file as a dependency of process being cached, after the initial hash has been @@ -381,7 +394,7 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.addFile(temp_file); + _ = try ch.addFile(temp_file); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); @@ -395,7 +408,7 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - try ch.addFile(temp_file); + _ = try ch.addFile(temp_file); // Cache hit! We just "built" the same file digest2 = (try ch.hit()).?; @@ -433,7 +446,7 @@ test "check that changing a file makes cache fail" { defer ch.release() catch unreachable; ch.add("1234"); - try ch.addFile(temp_file); + _ = try ch.addFile(temp_file); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); @@ -448,7 +461,7 @@ test "check that changing a file makes cache fail" { defer ch.release() catch unreachable; ch.add("1234"); - try ch.addFile(temp_file); + _ = try ch.addFile(temp_file); // A file that we depend on has been updated, so the cache should not contain an entry for it testing.expectEqual(@as(?[64]u8, null), try ch.hit()); From 0fa89dc51d6bb92f36ceb1399e07aa6f7c1dbd2b Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 17:04:13 -0600 Subject: [PATCH 37/49] Fix read from null pointer in CacheHash.hit It occured when the manifest file was manually edited to include an extra file. Now it will simply copy the file name in the manifest file --- lib/std/cache_hash.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6b3d6d7ba2..aa7ae6896d 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -186,6 +186,10 @@ pub const CacheHash = struct { return error.InvalidFormat; } + if (cache_hash_file.path == null) { + cache_hash_file.path = try mem.dupe(self.alloc, u8, file_path); + } + const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { return error.CacheUnavailable; }; From 1ffff1bb181da785b63bb3ed6accf23874cb07f3 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 17:06:03 -0600 Subject: [PATCH 38/49] Add test case for fix in previous commit --- lib/std/cache_hash.zig | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index aa7ae6896d..6624ff724a 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -509,3 +509,55 @@ test "no file inputs" { testing.expectEqual(digest1, digest2); } + +test "manifest file with extra line does not cause null pointer exeception" { + const cwd = fs.cwd(); + + const temp_file1 = "cache_hash_remove_file_test1.txt"; + const temp_file2 = "cache_hash_remove_file_test2.txt"; + const temp_manifest_dir = "cache_hash_remove_file_manifest_dir"; + + try cwd.writeFile(temp_file1, "Hello, world!\n"); + try cwd.writeFile(temp_file2, "Hello world the second!\n"); + + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; + var digest2: [BASE64_DIGEST_LEN]u8 = undefined; + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release() catch unreachable; + + ch.add("1234"); + _ = try ch.addFile(temp_file1); + _ = try ch.addFile(temp_file2); + + // There should be nothing in the cache + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + + digest1 = ch.final(); + } + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release() catch unreachable; + + ch.add("1234"); + _ = try ch.addFile(temp_file1); + _ = try ch.addFile(temp_file2); + { + // Remove an input file from the cache hash. + // We still have to add the input file, or else the initial cache + // hash will be different, and a different manifest file checked + const chf = ch.files.orderedRemove(1); + testing.allocator.free(chf.path.?); + } + + // A file that we depend on has been updated, so the cache should not contain an entry for it + digest2 = (try ch.hit()).?; + } + + testing.expect(mem.eql(u8, digest1[0..], digest2[0..])); + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file1); + try cwd.deleteFile(temp_file2); +} From 2c59f95e87979163e746cee2783d58d2f54e7da6 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 17:11:45 -0600 Subject: [PATCH 39/49] Don't use `iterate` when opening manifest directory --- lib/std/cache_hash.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6624ff724a..39a7f65350 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -44,7 +44,7 @@ pub const CacheHash = struct { pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { try fs.cwd().makePath(manifest_dir_path); - const manifest_dir = try fs.cwd().openDir(manifest_dir_path, .{ .iterate = true }); + const manifest_dir = try fs.cwd().openDir(manifest_dir_path, .{}); return CacheHash{ .alloc = alloc, From 42007307bed84ba691b86e17b6e414eef352e94c Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 17:12:29 -0600 Subject: [PATCH 40/49] Make if statement more idiomatic --- lib/std/cache_hash.zig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 39a7f65350..6fea1b3e2e 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -182,8 +182,10 @@ pub const CacheHash = struct { if (file_path.len == 0) { return error.InvalidFormat; } - if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) { - return error.InvalidFormat; + if (cache_hash_file.path) |p| { + if (!mem.eql(u8, file_path, p)) { + return error.InvalidFormat; + } } if (cache_hash_file.path == null) { From e6a37ed94193faf6450a0888c23168a83e914955 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 19:47:04 -0600 Subject: [PATCH 41/49] Change null pointer test to `addFilePost` test --- lib/std/cache_hash.zig | 45 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6fea1b3e2e..20d2323370 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -232,7 +232,13 @@ pub const CacheHash = struct { // reset the hash self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); + + // Remove files not in the initial hash + for (self.files.items[input_file_count..]) |*file| { + file.deinit(self.alloc); + } try self.files.resize(input_file_count); + for (self.files.items) |file| { self.blake3.update(&file.bin_digest); } @@ -512,18 +518,19 @@ test "no file inputs" { testing.expectEqual(digest1, digest2); } -test "manifest file with extra line does not cause null pointer exeception" { +test "CacheHashes with files added after initial hash work" { const cwd = fs.cwd(); - const temp_file1 = "cache_hash_remove_file_test1.txt"; - const temp_file2 = "cache_hash_remove_file_test2.txt"; - const temp_manifest_dir = "cache_hash_remove_file_manifest_dir"; + const temp_file1 = "cache_hash_post_file_test1.txt"; + const temp_file2 = "cache_hash_post_file_test2.txt"; + const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; try cwd.writeFile(temp_file1, "Hello, world!\n"); try cwd.writeFile(temp_file2, "Hello world the second!\n"); var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; + var digest3: [BASE64_DIGEST_LEN]u8 = undefined; { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); @@ -531,11 +538,12 @@ test "manifest file with extra line does not cause null pointer exeception" { ch.add("1234"); _ = try ch.addFile(temp_file1); - _ = try ch.addFile(temp_file2); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + _ = try ch.addFilePost(temp_file2); + digest1 = ch.final(); } { @@ -544,20 +552,31 @@ test "manifest file with extra line does not cause null pointer exeception" { ch.add("1234"); _ = try ch.addFile(temp_file1); - _ = try ch.addFile(temp_file2); - { - // Remove an input file from the cache hash. - // We still have to add the input file, or else the initial cache - // hash will be different, and a different manifest file checked - const chf = ch.files.orderedRemove(1); - testing.allocator.free(chf.path.?); - } // A file that we depend on has been updated, so the cache should not contain an entry for it digest2 = (try ch.hit()).?; } + // Modify the file added after initial hash + try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + + { + var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); + defer ch.release() catch unreachable; + + ch.add("1234"); + _ = try ch.addFile(temp_file1); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + + _ = try ch.addFilePost(temp_file2); + + digest3 = ch.final(); + } + testing.expect(mem.eql(u8, digest1[0..], digest2[0..])); + testing.expect(!mem.eql(u8, digest1[0..], digest3[0..])); try cwd.deleteTree(temp_manifest_dir); try cwd.deleteFile(temp_file1); From be69e8e871d26973c644cfad5225ae9e337179c9 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 19:54:40 -0600 Subject: [PATCH 42/49] Remove non-null assertion in `CacheHash.release()` People using the API as intended would never trigger this assertion anyway, but if someone has a non standard use case, I see no reason to make the program panic. --- lib/std/cache_hash.zig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 20d2323370..963614fa6b 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -349,14 +349,14 @@ pub const CacheHash = struct { /// Writing to the manifest file is the only way that this file can return an /// error. pub fn release(self: *@This()) !void { - debug.assert(self.manifest_file != null); + if (self.manifest_file) |file| { + if (self.manifest_dirty) { + try self.write_manifest(); + } - if (self.manifest_dirty) { - try self.write_manifest(); + file.close(); } - self.manifest_file.?.close(); - for (self.files.items) |*file| { file.deinit(self.alloc); } From 5a1c6a362743882ba2923570c24f77f555a39504 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Thu, 30 Apr 2020 20:00:26 -0600 Subject: [PATCH 43/49] Set manifest's maximum size to Andrew's recommendation --- lib/std/cache_hash.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 963614fa6b..bab72f8852 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -15,6 +15,8 @@ const base64_decoder = fs.base64_decoder; const BIN_DIGEST_LEN = 48; const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); +const MANIFEST_FILE_SIZE_MAX = 50 * 1024 * 1024; + pub const File = struct { path: ?[]const u8, stat: fs.File.Stat, @@ -150,8 +152,7 @@ pub const CacheHash = struct { }; } - // TODO: Figure out a good max value? - const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.alloc, 16 * 1024); + const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.alloc, MANIFEST_FILE_SIZE_MAX); defer self.alloc.free(file_contents); const input_file_count = self.files.items.len; From c3c332c9ecf8ab982ca1b704a54b7d1b439b520d Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Fri, 1 May 2020 23:06:10 -0600 Subject: [PATCH 44/49] Add max_file_size argument --- lib/std/cache_hash.zig | 72 +++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index bab72f8852..17de195422 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -19,6 +19,7 @@ const MANIFEST_FILE_SIZE_MAX = 50 * 1024 * 1024; pub const File = struct { path: ?[]const u8, + max_file_size: ?usize, stat: fs.File.Stat, bin_digest: [BIN_DIGEST_LEN]u8, contents: ?[]const u8 = null, @@ -85,18 +86,23 @@ pub const CacheHash = struct { /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. /// + /// Max file size will be used to determine the amount of space to the file contents + /// are allowed to take up in memory. If max_file_size is null, then the contents + /// will not be loaded into memory. + /// /// Returns the index of the entry in the `CacheHash.files` ArrayList. You can use it /// to access the contents of the file after calling `CacheHash.hit()` like so: /// /// ``` /// var file_contents = cache_hash.files.items[file_index].contents.?; /// ``` - pub fn addFile(self: *@This(), file_path: []const u8) !usize { + pub fn addFile(self: *@This(), file_path: []const u8, max_file_size: ?usize) !usize { debug.assert(self.manifest_file == null); const idx = self.files.items.len; var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); + cache_hash_file.max_file_size = max_file_size; self.addSlice(cache_hash_file.path.?); @@ -168,6 +174,7 @@ pub const CacheHash = struct { } else { cache_hash_file = try self.files.addOne(); cache_hash_file.path = null; + cache_hash_file.max_file_size = null; } var iter = mem.tokenize(line, " "); @@ -213,7 +220,7 @@ pub const CacheHash = struct { } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; - cache_hash_file.contents = try hash_file(self.alloc, &actual_digest, &this_file); + cache_hash_file.contents = try hash_file(self.alloc, &actual_digest, &this_file, cache_hash_file.max_file_size); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); @@ -260,7 +267,7 @@ pub const CacheHash = struct { return self.final(); } - fn populate_file_hash_fetch(self: *@This(), otherAlloc: *mem.Allocator, cache_hash_file: *File) ![]u8 { + fn populate_file_hash_fetch(self: *@This(), otherAlloc: *mem.Allocator, cache_hash_file: *File) !?[]u8 { debug.assert(cache_hash_file.path != null); const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); @@ -273,7 +280,7 @@ pub const CacheHash = struct { cache_hash_file.stat.inode = 0; } - const contents = try hash_file(otherAlloc, &cache_hash_file.bin_digest, &this_file); + const contents = try hash_file(otherAlloc, &cache_hash_file.bin_digest, &this_file, cache_hash_file.max_file_size); self.blake3.update(&cache_hash_file.bin_digest); return contents; @@ -289,13 +296,15 @@ pub const CacheHash = struct { /// will need to be recompiled if the imported file is changed. /// /// Returns the contents of the file, allocated with the given allocator. - pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8) ![]u8 { + pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8, max_file_size_opt: ?usize) !?[]u8 { debug.assert(self.manifest_file != null); var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - return try self.populate_file_hash_fetch(otherAlloc, cache_hash_file); + const contents = try self.populate_file_hash_fetch(otherAlloc, cache_hash_file); + + return contents; } /// Add a file as a dependency of process being cached, after the initial hash has been @@ -303,8 +312,7 @@ pub const CacheHash = struct { /// are depended on ahead of time. For example, a source file that can import other files /// will need to be recompiled if the imported file is changed. pub fn addFilePost(self: *@This(), file_path: []const u8) !void { - const contents = try self.addFilePostFetch(self.alloc, file_path); - self.alloc.free(contents); + _ = try self.addFilePostFetch(self.alloc, file_path, null); } /// Returns a base64 encoded hash of the inputs. @@ -367,16 +375,30 @@ pub const CacheHash = struct { }; /// Hash the file, and return the contents as an array -fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) ![]u8 { +fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File, max_file_size_opt: ?usize) !?[]u8 { var blake3 = Blake3.init(); + var in_stream = handle.inStream(); - const contents = try handle.inStream().readAllAlloc(alloc, 64 * 1024); + if (max_file_size_opt) |max_file_size| { + const contents = try in_stream.readAllAlloc(alloc, max_file_size); - blake3.update(contents); + blake3.update(contents); - blake3.final(bin_digest); + blake3.final(bin_digest); - return contents; + return contents; + } else { + var buf: [1024]u8 = undefined; + + while (true) { + const bytes_read = try in_stream.read(buf[0..]); + if (bytes_read == 0) break; + blake3.update(buf[0..bytes_read]); + } + + blake3.final(bin_digest); + return null; + } } /// If the wall clock time, rounded to the same precision as the @@ -407,7 +429,7 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - _ = try ch.addFile(temp_file); + _ = try ch.addFile(temp_file, null); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); @@ -421,7 +443,7 @@ test "cache file and then recall it" { ch.add(true); ch.add(@as(u16, 1234)); ch.add("1234"); - _ = try ch.addFile(temp_file); + _ = try ch.addFile(temp_file, null); // Cache hit! We just "built" the same file digest2 = (try ch.hit()).?; @@ -448,8 +470,10 @@ test "check that changing a file makes cache fail" { const temp_file = "cache_hash_change_file_test.txt"; const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; + const original_temp_file_contents = "Hello, world!\n"; + const updated_temp_file_contents = "Hello, world; but updated!\n"; - try cwd.writeFile(temp_file, "Hello, world!\n"); + try cwd.writeFile(temp_file, original_temp_file_contents); var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; @@ -459,26 +483,30 @@ test "check that changing a file makes cache fail" { defer ch.release() catch unreachable; ch.add("1234"); - _ = try ch.addFile(temp_file); + const temp_file_idx = try ch.addFile(temp_file, 100); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + digest1 = ch.final(); } - try cwd.writeFile(temp_file, "Hello, world; but updated!\n"); + try cwd.writeFile(temp_file, updated_temp_file_contents); { var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); defer ch.release() catch unreachable; ch.add("1234"); - _ = try ch.addFile(temp_file); + const temp_file_idx = try ch.addFile(temp_file, 100); // A file that we depend on has been updated, so the cache should not contain an entry for it testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expect(mem.eql(u8, updated_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + digest2 = ch.final(); } @@ -538,7 +566,7 @@ test "CacheHashes with files added after initial hash work" { defer ch.release() catch unreachable; ch.add("1234"); - _ = try ch.addFile(temp_file1); + _ = try ch.addFile(temp_file1, null); // There should be nothing in the cache testing.expectEqual(@as(?[64]u8, null), try ch.hit()); @@ -552,7 +580,7 @@ test "CacheHashes with files added after initial hash work" { defer ch.release() catch unreachable; ch.add("1234"); - _ = try ch.addFile(temp_file1); + _ = try ch.addFile(temp_file1, null); // A file that we depend on has been updated, so the cache should not contain an entry for it digest2 = (try ch.hit()).?; @@ -566,7 +594,7 @@ test "CacheHashes with files added after initial hash work" { defer ch.release() catch unreachable; ch.add("1234"); - _ = try ch.addFile(temp_file1); + _ = try ch.addFile(temp_file1, null); // A file that we depend on has been updated, so the cache should not contain an entry for it testing.expectEqual(@as(?[64]u8, null), try ch.hit()); From 72716ecc3a1f236fc46d754fa7387317c40aade5 Mon Sep 17 00:00:00 2001 From: LeRoyce Pearson Date: Sat, 2 May 2020 00:00:32 -0600 Subject: [PATCH 45/49] Fix improper initialization of CacheHashFiles --- lib/std/cache_hash.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 17de195422..a383666713 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -103,6 +103,7 @@ pub const CacheHash = struct { var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); cache_hash_file.max_file_size = max_file_size; + cache_hash_file.contents = null; self.addSlice(cache_hash_file.path.?); @@ -175,6 +176,7 @@ pub const CacheHash = struct { cache_hash_file = try self.files.addOne(); cache_hash_file.path = null; cache_hash_file.max_file_size = null; + cache_hash_file.contents = null; } var iter = mem.tokenize(line, " "); @@ -301,6 +303,8 @@ pub const CacheHash = struct { var cache_hash_file = try self.files.addOne(); cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); + cache_hash_file.max_file_size = max_file_size_opt; + cache_hash_file.contents = null; const contents = try self.populate_file_hash_fetch(otherAlloc, cache_hash_file); From 6d5ec184ab1a1b8c155714801f7d6cb7ea6f5b8f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 25 May 2020 15:02:02 -0400 Subject: [PATCH 46/49] stage2 parser: heuristics to pre-allocate token arrays throughput: 72.2 MiB/s => 75.3 MiB/s --- lib/std/zig/parse.zig | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 0cb3103b22..612f2b54ad 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -14,13 +14,16 @@ pub const Error = error{ParseError} || Allocator.Error; /// Result should be freed with tree.deinit() when there are /// no more references to any of the tokens or nodes. pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree { - // TODO optimization idea: ensureCapacity on the tokens list and - // then appendAssumeCapacity inside the loop. var token_ids = std.ArrayList(Token.Id).init(gpa); defer token_ids.deinit(); var token_locs = std.ArrayList(Token.Loc).init(gpa); defer token_locs.deinit(); + // Empirically, the zig std lib has an 8:1 ratio of source bytes to token count. + const estimated_token_count = source.len / 8; + try token_ids.ensureCapacity(estimated_token_count); + try token_locs.ensureCapacity(estimated_token_count); + var tokenizer = std.zig.Tokenizer.init(source); while (true) { const token = tokenizer.next(); From cda102be020ef9c5c1425553ff611720f496f17e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 25 May 2020 19:29:03 -0400 Subject: [PATCH 47/49] improvements to self-hosted cache hash system * change miscellaneous things to more idiomatic zig style * change the digest length to 24 bytes instead of 48. This is still 70 more bits than UUIDs. For an analysis of probability of collisions, see: https://en.wikipedia.org/wiki/Universally_unique_identifier#Collisions * fix the API having the possibility of mismatched allocators * fix some error paths to behave properly * modify the guarantees about when file contents are loaded for input files * pwrite instead of seek + write * implement isProblematicTimestamp * fix tests with regards to a working isProblematicTimestamp function. this requires sleeping until the current timestamp becomes unproblematic. * introduce std.fs.File.INode, a cross platform type abstraction so that cache hash implementation does not need to reach into std.os. --- lib/std/cache_hash.zig | 392 ++++++++++++++++++++++++----------------- lib/std/fs/file.zig | 6 +- 2 files changed, 237 insertions(+), 161 deletions(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index a383666713..6d965a1fcb 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -1,18 +1,19 @@ -const Blake3 = @import("crypto.zig").Blake3; -const fs = @import("fs.zig"); -const base64 = @import("base64.zig"); -const ArrayList = @import("array_list.zig").ArrayList; -const debug = @import("debug.zig"); -const testing = @import("testing.zig"); -const mem = @import("mem.zig"); -const fmt = @import("fmt.zig"); -const Allocator = mem.Allocator; -const os = @import("os.zig"); -const time = @import("time.zig"); +const std = @import("std.zig"); +const Blake3 = std.crypto.Blake3; +const fs = std.fs; +const base64 = std.base64; +const ArrayList = std.ArrayList; +const assert = std.debug.assert; +const testing = std.testing; +const mem = std.mem; +const fmt = std.fmt; +const Allocator = std.mem.Allocator; const base64_encoder = fs.base64_encoder; const base64_decoder = fs.base64_decoder; -const BIN_DIGEST_LEN = 48; +/// This is 70 more bits than UUIDs. For an analysis of probability of collisions, see: +/// https://en.wikipedia.org/wiki/Universally_unique_identifier#Collisions +const BIN_DIGEST_LEN = 24; const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN); const MANIFEST_FILE_SIZE_MAX = 50 * 1024 * 1024; @@ -22,22 +23,23 @@ pub const File = struct { max_file_size: ?usize, stat: fs.File.Stat, bin_digest: [BIN_DIGEST_LEN]u8, - contents: ?[]const u8 = null, + contents: ?[]const u8, - pub fn deinit(self: *@This(), alloc: *Allocator) void { + pub fn deinit(self: *File, allocator: *Allocator) void { if (self.path) |owned_slice| { - alloc.free(owned_slice); + allocator.free(owned_slice); self.path = null; } if (self.contents) |contents| { - alloc.free(contents); + allocator.free(contents); self.contents = null; } + self.* = undefined; } }; pub const CacheHash = struct { - alloc: *Allocator, + allocator: *Allocator, blake3: Blake3, manifest_dir: fs.Dir, manifest_file: ?fs.File, @@ -45,24 +47,22 @@ pub const CacheHash = struct { files: ArrayList(File), b64_digest: [BASE64_DIGEST_LEN]u8, - pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() { - try fs.cwd().makePath(manifest_dir_path); - const manifest_dir = try fs.cwd().openDir(manifest_dir_path, .{}); - + /// Be sure to call release after successful initialization. + pub fn init(allocator: *Allocator, dir: fs.Dir, manifest_dir_path: []const u8) !CacheHash { return CacheHash{ - .alloc = alloc, + .allocator = allocator, .blake3 = Blake3.init(), - .manifest_dir = manifest_dir, + .manifest_dir = try dir.makeOpenPath(manifest_dir_path, .{}), .manifest_file = null, .manifest_dirty = false, - .files = ArrayList(File).init(alloc), + .files = ArrayList(File).init(allocator), .b64_digest = undefined, }; } /// Record a slice of bytes as an dependency of the process being cached - pub fn addSlice(self: *@This(), val: []const u8) void { - debug.assert(self.manifest_file == null); + pub fn addSlice(self: *CacheHash, val: []const u8) void { + assert(self.manifest_file == null); self.blake3.update(val); self.blake3.update(&[_]u8{0}); @@ -70,8 +70,8 @@ pub const CacheHash = struct { /// Convert the input value into bytes and record it as a dependency of the /// process being cached - pub fn add(self: *@This(), val: var) void { - debug.assert(self.manifest_file == null); + pub fn add(self: *CacheHash, val: var) void { + assert(self.manifest_file == null); const valPtr = switch (@typeInfo(@TypeOf(val))) { .Int => &val, @@ -96,16 +96,22 @@ pub const CacheHash = struct { /// ``` /// var file_contents = cache_hash.files.items[file_index].contents.?; /// ``` - pub fn addFile(self: *@This(), file_path: []const u8, max_file_size: ?usize) !usize { - debug.assert(self.manifest_file == null); + pub fn addFile(self: *CacheHash, file_path: []const u8, max_file_size: ?usize) !usize { + assert(self.manifest_file == null); + + try self.files.ensureCapacity(self.files.items.len + 1); + const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); const idx = self.files.items.len; - var cache_hash_file = try self.files.addOne(); - cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - cache_hash_file.max_file_size = max_file_size; - cache_hash_file.contents = null; + self.files.addOneAssumeCapacity().* = .{ + .path = resolved_path, + .contents = null, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + }; - self.addSlice(cache_hash_file.path.?); + self.addSlice(resolved_path); return idx; } @@ -118,8 +124,8 @@ pub const CacheHash = struct { /// acquire the lock. /// /// The lock on the manifest file is released when `CacheHash.release` is called. - pub fn hit(self: *@This()) !?[BASE64_DIGEST_LEN]u8 { - debug.assert(self.manifest_file == null); + pub fn hit(self: *CacheHash) !?[BASE64_DIGEST_LEN]u8 { + assert(self.manifest_file == null); var bin_digest: [BIN_DIGEST_LEN]u8 = undefined; self.blake3.final(&bin_digest); @@ -129,8 +135,8 @@ pub const CacheHash = struct { self.blake3 = Blake3.init(); self.blake3.update(&bin_digest); - const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest}); - defer self.alloc.free(manifest_file_path); + const manifest_file_path = try fmt.allocPrint(self.allocator, "{}.txt", .{self.b64_digest}); + defer self.allocator.free(manifest_file_path); if (self.files.items.len != 0) { self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ @@ -159,8 +165,8 @@ pub const CacheHash = struct { }; } - const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.alloc, MANIFEST_FILE_SIZE_MAX); - defer self.alloc.free(file_contents); + const file_contents = try self.manifest_file.?.inStream().readAllAlloc(self.allocator, MANIFEST_FILE_SIZE_MAX); + defer self.allocator.free(file_contents); const input_file_count = self.files.items.len; var any_file_changed = false; @@ -169,15 +175,17 @@ pub const CacheHash = struct { while (line_iter.next()) |line| { defer idx += 1; - var cache_hash_file: *File = undefined; - if (idx < input_file_count) { - cache_hash_file = &self.files.items[idx]; - } else { - cache_hash_file = try self.files.addOne(); - cache_hash_file.path = null; - cache_hash_file.max_file_size = null; - cache_hash_file.contents = null; - } + const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { + const new = try self.files.addOne(); + new.* = .{ + .path = null, + .contents = null, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + }; + break :blk new; + }; var iter = mem.tokenize(line, " "); const inode = iter.next() orelse return error.InvalidFormat; @@ -185,7 +193,7 @@ pub const CacheHash = struct { const digest_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); - cache_hash_file.stat.inode = fmt.parseInt(os.ino_t, mtime_nsec_str, 10) catch return error.InvalidFormat; + cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, mtime_nsec_str, 10) catch return error.InvalidFormat; cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; @@ -199,7 +207,7 @@ pub const CacheHash = struct { } if (cache_hash_file.path == null) { - cache_hash_file.path = try mem.dupe(self.alloc, u8, file_path); + cache_hash_file.path = try mem.dupe(self.allocator, u8, file_path); } const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch { @@ -216,16 +224,16 @@ pub const CacheHash = struct { cache_hash_file.stat = actual_stat; - if (is_problematic_timestamp(cache_hash_file.stat.mtime)) { + if (isProblematicTimestamp(cache_hash_file.stat.mtime)) { cache_hash_file.stat.mtime = 0; cache_hash_file.stat.inode = 0; } var actual_digest: [BIN_DIGEST_LEN]u8 = undefined; - cache_hash_file.contents = try hash_file(self.alloc, &actual_digest, &this_file, cache_hash_file.max_file_size); + try hashFile(this_file, &actual_digest); if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { - mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest); + cache_hash_file.bin_digest = actual_digest; // keep going until we have the input file digests any_file_changed = true; } @@ -245,9 +253,9 @@ pub const CacheHash = struct { // Remove files not in the initial hash for (self.files.items[input_file_count..]) |*file| { - file.deinit(self.alloc); + file.deinit(self.allocator); } - try self.files.resize(input_file_count); + self.files.shrink(input_file_count); for (self.files.items) |file| { self.blake3.update(&file.bin_digest); @@ -258,10 +266,8 @@ pub const CacheHash = struct { if (idx < input_file_count) { self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - var cache_hash_file = &self.files.items[idx]; - const contents = self.populate_file_hash(cache_hash_file) catch |err| { - return error.CacheUnavailable; - }; + const ch_file = &self.files.items[idx]; + try self.populateFileHash(ch_file); } return null; } @@ -269,59 +275,97 @@ pub const CacheHash = struct { return self.final(); } - fn populate_file_hash_fetch(self: *@This(), otherAlloc: *mem.Allocator, cache_hash_file: *File) !?[]u8 { - debug.assert(cache_hash_file.path != null); + fn populateFileHash(self: *CacheHash, ch_file: *File) !void { + const file = try fs.cwd().openFile(ch_file.path.?, .{}); + defer file.close(); - const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{}); - defer this_file.close(); + ch_file.stat = try file.stat(); - cache_hash_file.stat = try this_file.stat(); - - if (is_problematic_timestamp(cache_hash_file.stat.mtime)) { - cache_hash_file.stat.mtime = 0; - cache_hash_file.stat.inode = 0; + if (isProblematicTimestamp(ch_file.stat.mtime)) { + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; } - const contents = try hash_file(otherAlloc, &cache_hash_file.bin_digest, &this_file, cache_hash_file.max_file_size); - self.blake3.update(&cache_hash_file.bin_digest); + if (ch_file.max_file_size) |max_file_size| { + if (ch_file.stat.size > max_file_size) { + return error.FileTooBig; + } - return contents; - } + const contents = try self.allocator.alloc(u8, ch_file.stat.size); + errdefer self.allocator.free(contents); - fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void { - cache_hash_file.contents = try self.populate_file_hash_fetch(self.alloc, cache_hash_file); + // Hash while reading from disk, to keep the contents in the cpu cache while + // doing hashing. + var blake3 = Blake3.init(); + var off: usize = 0; + while (true) { + // give me everything you've got, captain + const bytes_read = try file.read(contents[off..]); + if (bytes_read == 0) break; + blake3.update(contents[off..][0..bytes_read]); + off += bytes_read; + } + blake3.final(&ch_file.bin_digest); + + ch_file.contents = contents; + } else { + try hashFile(file, &ch_file.bin_digest); + } + + self.blake3.update(&ch_file.bin_digest); } /// Add a file as a dependency of process being cached, after the initial hash has been /// calculated. This is useful for processes that don't know the all the files that /// are depended on ahead of time. For example, a source file that can import other files /// will need to be recompiled if the imported file is changed. - /// - /// Returns the contents of the file, allocated with the given allocator. - pub fn addFilePostFetch(self: *@This(), otherAlloc: *mem.Allocator, file_path: []const u8, max_file_size_opt: ?usize) !?[]u8 { - debug.assert(self.manifest_file != null); + pub fn addFilePostFetch(self: *CacheHash, file_path: []const u8, max_file_size: usize) ![]u8 { + assert(self.manifest_file != null); - var cache_hash_file = try self.files.addOne(); - cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path}); - cache_hash_file.max_file_size = max_file_size_opt; - cache_hash_file.contents = null; + const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + errdefer self.allocator.free(resolved_path); - const contents = try self.populate_file_hash_fetch(otherAlloc, cache_hash_file); + const new_ch_file = try self.files.addOne(); + new_ch_file.* = .{ + .path = resolved_path, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrink(self.files.items.len - 1); - return contents; + try self.populateFileHash(new_ch_file); + + return new_ch_file.contents.?; } /// Add a file as a dependency of process being cached, after the initial hash has been /// calculated. This is useful for processes that don't know the all the files that /// are depended on ahead of time. For example, a source file that can import other files /// will need to be recompiled if the imported file is changed. - pub fn addFilePost(self: *@This(), file_path: []const u8) !void { - _ = try self.addFilePostFetch(self.alloc, file_path, null); + pub fn addFilePost(self: *CacheHash, file_path: []const u8) !void { + assert(self.manifest_file != null); + + const resolved_path = try fs.path.resolve(self.allocator, &[_][]const u8{file_path}); + errdefer self.allocator.free(resolved_path); + + const new_ch_file = try self.files.addOne(); + new_ch_file.* = .{ + .path = resolved_path, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrink(self.files.items.len - 1); + + try self.populateFileHash(new_ch_file); } /// Returns a base64 encoded hash of the inputs. - pub fn final(self: *@This()) [BASE64_DIGEST_LEN]u8 { - debug.assert(self.manifest_file != null); + pub fn final(self: *CacheHash) [BASE64_DIGEST_LEN]u8 { + assert(self.manifest_file != null); // We don't close the manifest file yet, because we want to // keep it locked until the API user is done using it. @@ -338,11 +382,11 @@ pub const CacheHash = struct { return out_digest; } - pub fn write_manifest(self: *@This()) !void { - debug.assert(self.manifest_file != null); + pub fn writeManifest(self: *CacheHash) !void { + assert(self.manifest_file != null); var encoded_digest: [BASE64_DIGEST_LEN]u8 = undefined; - var contents = ArrayList(u8).init(self.alloc); + var contents = ArrayList(u8).init(self.allocator); var outStream = contents.outStream(); defer contents.deinit(); @@ -351,68 +395,78 @@ pub const CacheHash = struct { try outStream.print("{} {} {} {}\n", .{ file.stat.inode, file.stat.mtime, encoded_digest[0..], file.path }); } - try self.manifest_file.?.seekTo(0); - try self.manifest_file.?.writeAll(contents.items); + try self.manifest_file.?.pwriteAll(contents.items, 0); + self.manifest_dirty = false; } /// Releases the manifest file and frees any memory the CacheHash was using. /// `CacheHash.hit` must be called first. /// /// Will also attempt to write to the manifest file if the manifest is dirty. - /// Writing to the manifest file is the only way that this file can return an - /// error. - pub fn release(self: *@This()) !void { + /// Writing to the manifest file can fail, but this function ignores those errors. + /// To detect failures from writing the manifest, one may explicitly call + /// `writeManifest` before `release`. + pub fn release(self: *CacheHash) void { if (self.manifest_file) |file| { if (self.manifest_dirty) { - try self.write_manifest(); + // To handle these errors, API users should call + // writeManifest before release(). + self.writeManifest() catch {}; } file.close(); } for (self.files.items) |*file| { - file.deinit(self.alloc); + file.deinit(self.allocator); } self.files.deinit(); self.manifest_dir.close(); } }; -/// Hash the file, and return the contents as an array -fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File, max_file_size_opt: ?usize) !?[]u8 { +fn hashFile(file: fs.File, bin_digest: []u8) !void { var blake3 = Blake3.init(); - var in_stream = handle.inStream(); + var buf: [1024]u8 = undefined; - if (max_file_size_opt) |max_file_size| { - const contents = try in_stream.readAllAlloc(alloc, max_file_size); - - blake3.update(contents); - - blake3.final(bin_digest); - - return contents; - } else { - var buf: [1024]u8 = undefined; - - while (true) { - const bytes_read = try in_stream.read(buf[0..]); - if (bytes_read == 0) break; - blake3.update(buf[0..bytes_read]); - } - - blake3.final(bin_digest); - return null; + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + blake3.update(buf[0..bytes_read]); } + + blake3.final(bin_digest); } /// If the wall clock time, rounded to the same precision as the /// mtime, is equal to the mtime, then we cannot rely on this mtime /// yet. We will instead save an mtime value that indicates the hash /// must be unconditionally computed. -fn is_problematic_timestamp(file_mtime_ns: i64) bool { - const now_ms = time.milliTimestamp(); - const file_mtime_ms = @divFloor(file_mtime_ns, time.millisecond); - return now_ms == file_mtime_ms; +/// This function recognizes the precision of mtime by looking at trailing +/// zero bits of the seconds and nanoseconds. +fn isProblematicTimestamp(fs_clock: i128) bool { + const wall_clock = std.time.nanoTimestamp(); + + // We have to break the nanoseconds into seconds and remainder nanoseconds + // to detect precision of seconds, because looking at the zero bits in base + // 2 would not detect precision of the seconds value. + const fs_sec = @intCast(i64, @divFloor(fs_clock, std.time.ns_per_s)); + const fs_nsec = @intCast(i64, @mod(fs_clock, std.time.ns_per_s)); + var wall_sec = @intCast(i64, @divFloor(wall_clock, std.time.ns_per_s)); + var wall_nsec = @intCast(i64, @mod(wall_clock, std.time.ns_per_s)); + + // First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock. + if (fs_nsec == 0) { + wall_nsec = 0; + if (fs_sec == 0) { + wall_sec = 0; + } else { + wall_sec &= @as(i64, -1) << @intCast(u6, @ctz(i64, fs_sec)); + } + } else { + wall_nsec &= @as(i64, -1) << @intCast(u6, @ctz(i64, fs_nsec)); + } + return wall_nsec == fs_nsec and wall_sec == fs_sec; } test "cache file and then recall it" { @@ -423,12 +477,16 @@ test "cache file and then recall it" { try cwd.writeFile(temp_file, "Hello, world!\n"); + while (isProblematicTimestamp(std.time.nanoTimestamp())) { + std.time.sleep(1); + } + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add(true); ch.add(@as(u16, 1234)); @@ -436,13 +494,13 @@ test "cache file and then recall it" { _ = try ch.addFile(temp_file, null); // There should be nothing in the cache - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add(true); ch.add(@as(u16, 1234)); @@ -460,13 +518,15 @@ test "cache file and then recall it" { } test "give problematic timestamp" { - const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond); - testing.expect(is_problematic_timestamp(now_ns)); + var fs_clock = std.time.nanoTimestamp(); + // to make it problematic, we make it only accurate to the second + fs_clock = @divTrunc(fs_clock, std.time.ns_per_s); + fs_clock *= std.time.ns_per_s; + testing.expect(isProblematicTimestamp(fs_clock)); } test "give nonproblematic timestamp" { - const now_ns = @intCast(i64, time.milliTimestamp() * time.millisecond) - 1000; - testing.expect(!is_problematic_timestamp(now_ns)); + testing.expect(!isProblematicTimestamp(std.time.nanoTimestamp() - std.time.ns_per_s)); } test "check that changing a file makes cache fail" { @@ -479,18 +539,22 @@ test "check that changing a file makes cache fail" { try cwd.writeFile(temp_file, original_temp_file_contents); + while (isProblematicTimestamp(std.time.nanoTimestamp())) { + std.time.sleep(1); + } + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); const temp_file_idx = try ch.addFile(temp_file, 100); // There should be nothing in the cache - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); @@ -499,17 +563,22 @@ test "check that changing a file makes cache fail" { try cwd.writeFile(temp_file, updated_temp_file_contents); + while (isProblematicTimestamp(std.time.nanoTimestamp())) { + std.time.sleep(1); + } + { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); const temp_file_idx = try ch.addFile(temp_file, 100); // A file that we depend on has been updated, so the cache should not contain an entry for it - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); - testing.expect(mem.eql(u8, updated_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + // The cache system does not keep the contents of re-hashed input files. + testing.expect(ch.files.items[temp_file_idx].contents == null); digest2 = ch.final(); } @@ -529,19 +598,19 @@ test "no file inputs" { var digest2: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); // There should be nothing in the cache - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); @@ -561,55 +630,62 @@ test "CacheHashes with files added after initial hash work" { try cwd.writeFile(temp_file1, "Hello, world!\n"); try cwd.writeFile(temp_file2, "Hello world the second!\n"); + while (isProblematicTimestamp(std.time.nanoTimestamp())) { + std.time.sleep(1); + } + var digest1: [BASE64_DIGEST_LEN]u8 = undefined; var digest2: [BASE64_DIGEST_LEN]u8 = undefined; var digest3: [BASE64_DIGEST_LEN]u8 = undefined; { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); _ = try ch.addFile(temp_file1, null); // There should be nothing in the cache - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); _ = try ch.addFilePost(temp_file2); digest1 = ch.final(); } { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); _ = try ch.addFile(temp_file1, null); - // A file that we depend on has been updated, so the cache should not contain an entry for it digest2 = (try ch.hit()).?; } + testing.expect(mem.eql(u8, &digest1, &digest2)); // Modify the file added after initial hash try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + while (isProblematicTimestamp(std.time.nanoTimestamp())) { + std.time.sleep(1); + } + { - var ch = try CacheHash.init(testing.allocator, temp_manifest_dir); - defer ch.release() catch unreachable; + var ch = try CacheHash.init(testing.allocator, cwd, temp_manifest_dir); + defer ch.release(); ch.add("1234"); _ = try ch.addFile(temp_file1, null); // A file that we depend on has been updated, so the cache should not contain an entry for it - testing.expectEqual(@as(?[64]u8, null), try ch.hit()); + testing.expectEqual(@as(?[32]u8, null), try ch.hit()); _ = try ch.addFilePost(temp_file2); digest3 = ch.final(); } - testing.expect(mem.eql(u8, digest1[0..], digest2[0..])); - testing.expect(!mem.eql(u8, digest1[0..], digest3[0..])); + testing.expect(!mem.eql(u8, &digest1, &digest3)); try cwd.deleteTree(temp_manifest_dir); try cwd.deleteFile(temp_file1); diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index a9ee996e4b..ce9546a9d9 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -27,6 +27,7 @@ pub const File = struct { intended_io_mode: io.ModeOverride = io.default_mode, pub const Mode = os.mode_t; + pub const INode = os.ino_t; pub const default_mode = switch (builtin.os.tag) { .windows => 0, @@ -215,15 +216,14 @@ pub const File = struct { pub const Stat = struct { /// A number that the system uses to point to the file metadata. This number is not guaranteed to be - /// unique across time, as some file systems may reuse an inode after it's file has been deleted. + /// unique across time, as some file systems may reuse an inode after its file has been deleted. /// Some systems may change the inode of a file over time. /// /// On Linux, the inode _is_ structure that stores the metadata, and the inode _number_ is what /// you see here: the index number of the inode. /// /// The FileIndex on Windows is similar. It is a number for a file that is unique to each filesystem. - inode: os.ino_t, - + inode: INode, size: u64, mode: Mode, From a83aab5209a040dc8718c71de87a1fa72967ba9d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 25 May 2020 19:46:28 -0400 Subject: [PATCH 48/49] fix std lib tests for WASI --- lib/std/cache_hash.zig | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/std/cache_hash.zig b/lib/std/cache_hash.zig index 6d965a1fcb..d160c4ebb2 100644 --- a/lib/std/cache_hash.zig +++ b/lib/std/cache_hash.zig @@ -291,7 +291,7 @@ pub const CacheHash = struct { return error.FileTooBig; } - const contents = try self.allocator.alloc(u8, ch_file.stat.size); + const contents = try self.allocator.alloc(u8, @intCast(usize, ch_file.stat.size)); errdefer self.allocator.free(contents); // Hash while reading from disk, to keep the contents in the cpu cache while @@ -470,6 +470,10 @@ fn isProblematicTimestamp(fs_clock: i128) bool { } test "cache file and then recall it" { + if (std.Target.current.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } const cwd = fs.cwd(); const temp_file = "test.txt"; @@ -530,6 +534,10 @@ test "give nonproblematic timestamp" { } test "check that changing a file makes cache fail" { + if (std.Target.current.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } const cwd = fs.cwd(); const temp_file = "cache_hash_change_file_test.txt"; @@ -590,6 +598,10 @@ test "check that changing a file makes cache fail" { } test "no file inputs" { + if (std.Target.current.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } const cwd = fs.cwd(); const temp_manifest_dir = "no_file_inputs_manifest_dir"; defer cwd.deleteTree(temp_manifest_dir) catch unreachable; @@ -621,6 +633,10 @@ test "no file inputs" { } test "CacheHashes with files added after initial hash work" { + if (std.Target.current.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } const cwd = fs.cwd(); const temp_file1 = "cache_hash_post_file_test1.txt"; From 7c8d0cc67876ce2059a8a7b2bc35f4948a2e42c8 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 25 May 2020 19:59:39 -0400 Subject: [PATCH 49/49] fix pwrite on 32-bit linux --- lib/std/os/linux.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 6653293e59..f00238141a 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -417,7 +417,7 @@ pub fn ftruncate(fd: i32, length: u64) usize { } } -pub fn pwrite(fd: i32, buf: [*]const u8, count: usize, offset: usize) usize { +pub fn pwrite(fd: i32, buf: [*]const u8, count: usize, offset: u64) usize { if (@hasField(SYS, "pwrite64")) { if (require_aligned_register_pair) { return syscall6(