From 72ee042ab0c6dc823f839bca6d1511a144f62b49 Mon Sep 17 00:00:00 2001 From: Travis Martin Date: Sun, 10 Oct 2021 21:57:26 -0700 Subject: [PATCH 1/4] Cache: fix two issues with isProblematicTimestamp 1. It was looking for trailing zero bits when it should be looking for trailing decimal zeros. 2. Clock timestamps had more precision than the actual file timestamps The fix is to grab a timestamp from a 'just now changed' temp file. This timestamp is "problematic". Any file timestamp greater than or equal to this timestamp is considered problematic. File timestamps **prior** to this **can** be trusted. Downside is that it causes a disk I/O to write to and then read the timestamp from this file ~1ms on my system. This is partially mitigated by keeping track of the most recent problematic timestamp, and only checking for a new problematic timestamp when checking a timestamp that is equal to or larger than the last problematic one. This fixes #6082. --- src/Cache.zig | 102 +++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/src/Cache.zig b/src/Cache.zig index 1362f6d816..783b7b9ffa 100644 --- a/src/Cache.zig +++ b/src/Cache.zig @@ -187,11 +187,14 @@ pub const Manifest = struct { /// of the files listed in the manifest. failed_file_index: ?usize = null, + /// most recent problematic timestamp + recent_problematic_timestamp: i128 = 0, + /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. /// - /// Max file size will be used to determine the amount of space to the file contents + /// Max file size will be used to determine the amount of space the file contents /// are allowed to take up in memory. If max_file_size is null, then the contents /// will not be loaded into memory. /// @@ -414,7 +417,8 @@ pub const Manifest = struct { cache_hash_file.stat = actual_stat; - if (isProblematicTimestamp(cache_hash_file.stat.mtime)) { + if (try self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed cache_hash_file.stat.mtime = 0; cache_hash_file.stat.inode = 0; } @@ -485,7 +489,8 @@ pub const Manifest = struct { ch_file.stat = try file.stat(); - if (isProblematicTimestamp(ch_file.stat.mtime)) { + if (try self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed ch_file.stat.mtime = 0; ch_file.stat.inode = 0; } @@ -520,7 +525,7 @@ pub const Manifest = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. This is useful for processes that don't know the all the files that + /// calculated. This is useful for processes that don't know all the files that /// are depended on ahead of time. For example, a source file that can import other files /// will need to be recompiled if the imported file is changed. pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 { @@ -679,6 +684,26 @@ pub const Manifest = struct { self.have_exclusive_lock = true; } + // Create/Write a file, close it, then grab its stat.mtime timestamp. + fn isProblematicTimestamp(self: *Manifest, file_time: i128) !bool { + + // PERF: Check if the file_time is prior to the most recent problematic timestamp + // and break out early if so (avoids an I/O to update the recent_problematic_timestamp) + if (file_time < self.recent_problematic_timestamp) + return false; + + var timestamp_file = try self.cache.manifest_dir.createFile("filetimestamp.tmp", .{ + .read = true, + .truncate = false, + }); + defer timestamp_file.close(); + try timestamp_file.setEndPos(0); + + self.recent_problematic_timestamp = (try timestamp_file.stat()).mtime; + + return (file_time >= self.recent_problematic_timestamp); + } + /// Obtain only the data needed to maintain a lock on the manifest file. /// The `Manifest` remains safe to deinit. /// Don't forget to call `writeManifest` before this! @@ -741,35 +766,16 @@ fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { hasher.final(bin_digest); } -/// If the wall clock time, rounded to the same precision as the -/// mtime, is equal to the mtime, then we cannot rely on this mtime -/// yet. We will instead save an mtime value that indicates the hash -/// must be unconditionally computed. -/// This function recognizes the precision of mtime by looking at trailing -/// zero bits of the seconds and nanoseconds. -fn isProblematicTimestamp(fs_clock: i128) bool { - const wall_clock = std.time.nanoTimestamp(); +// Create/Write a file, close it, then grab its stat.mtime timestamp. +fn testGetCurrentFileTimestamp() !i128 { + var timestamp_file = try fs.cwd().createFile("zig-cache/filetimestamp.tmp", .{ + .read = true, + .truncate = false, + }); + defer timestamp_file.close(); + try timestamp_file.setEndPos(0); - // We have to break the nanoseconds into seconds and remainder nanoseconds - // to detect precision of seconds, because looking at the zero bits in base - // 2 would not detect precision of the seconds value. - const fs_sec = @intCast(i64, @divFloor(fs_clock, std.time.ns_per_s)); - const fs_nsec = @intCast(i64, @mod(fs_clock, std.time.ns_per_s)); - var wall_sec = @intCast(i64, @divFloor(wall_clock, std.time.ns_per_s)); - var wall_nsec = @intCast(i64, @mod(wall_clock, std.time.ns_per_s)); - - // First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock. - if (fs_nsec == 0) { - wall_nsec = 0; - if (fs_sec == 0) { - wall_sec = 0; - } else { - wall_sec &= @as(i64, -1) << @intCast(u6, @ctz(i64, fs_sec)); - } - } else { - wall_nsec &= @as(i64, -1) << @intCast(u6, @ctz(i64, fs_nsec)); - } - return wall_nsec == fs_nsec and wall_sec == fs_sec; + return (try timestamp_file.stat()).mtime; } test "cache file and then recall it" { @@ -783,10 +789,11 @@ test "cache file and then recall it" { const temp_file = "test.txt"; const temp_manifest_dir = "temp_manifest_dir"; - const ts = std.time.nanoTimestamp(); try cwd.writeFile(temp_file, "Hello, world!\n"); - while (isProblematicTimestamp(ts)) { + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { std.time.sleep(1); } @@ -838,18 +845,6 @@ test "cache file and then recall it" { try cwd.deleteFile(temp_file); } -test "give problematic timestamp" { - var fs_clock = std.time.nanoTimestamp(); - // to make it problematic, we make it only accurate to the second - fs_clock = @divTrunc(fs_clock, std.time.ns_per_s); - fs_clock *= std.time.ns_per_s; - try testing.expect(isProblematicTimestamp(fs_clock)); -} - -test "give nonproblematic timestamp" { - try testing.expect(!isProblematicTimestamp(std.time.nanoTimestamp() - std.time.ns_per_s)); -} - test "check that changing a file makes cache fail" { if (builtin.os.tag == .wasi) { // https://github.com/ziglang/zig/issues/5437 @@ -865,10 +860,11 @@ test "check that changing a file makes cache fail" { try cwd.deleteTree(temp_manifest_dir); try cwd.deleteTree(temp_file); - const ts = std.time.nanoTimestamp(); try cwd.writeFile(temp_file, original_temp_file_contents); - while (isProblematicTimestamp(ts)) { + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { std.time.sleep(1); } @@ -982,11 +978,12 @@ test "Manifest with files added after initial hash work" { const temp_file2 = "cache_hash_post_file_test2.txt"; const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; - const ts1 = std.time.nanoTimestamp(); try cwd.writeFile(temp_file1, "Hello, world!\n"); try cwd.writeFile(temp_file2, "Hello world the second!\n"); - while (isProblematicTimestamp(ts1)) { + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { std.time.sleep(1); } @@ -1031,10 +1028,11 @@ test "Manifest with files added after initial hash work" { try testing.expect(mem.eql(u8, &digest1, &digest2)); // Modify the file added after initial hash - const ts2 = std.time.nanoTimestamp(); try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); - while (isProblematicTimestamp(ts2)) { + // Wait for file timestamps to tick + const initial_time2 = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time2) { std.time.sleep(1); } From 4da83feccb4b0e59afdcce9796b08cc4fc8346ae Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 9 Dec 2021 18:55:20 -0700 Subject: [PATCH 2/4] Cache: improvements to previous commit * put `recent_problematic_timestamp` onto `Cache` so that it can be shared by multiple Manifest instances. * make `isProblematicTimestamp` return true on any filesystem error. * save 1 syscall by using truncate=true in createFile instead of calling `setEndPos`. --- src/Cache.zig | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/src/Cache.zig b/src/Cache.zig index 783b7b9ffa..857ddc78e3 100644 --- a/src/Cache.zig +++ b/src/Cache.zig @@ -1,6 +1,7 @@ gpa: Allocator, manifest_dir: fs.Dir, hash: HashHelper = .{}, +recent_problematic_timestamp: i128 = 0, const Cache = @This(); const std = @import("std"); @@ -16,7 +17,7 @@ const Compilation = @import("Compilation.zig"); const log = std.log.scoped(.cache); /// Be sure to call `Manifest.deinit` after successful initialization. -pub fn obtain(cache: *const Cache) Manifest { +pub fn obtain(cache: *Cache) Manifest { return Manifest{ .cache = cache, .hash = cache.hash, @@ -170,7 +171,7 @@ pub const Lock = struct { /// This is not a general-purpose cache. /// It is designed to be fast and simple, not to withstand attacks using specially-crafted input. pub const Manifest = struct { - cache: *const Cache, + cache: *Cache, /// Current state for incremental hashing. hash: HashHelper, manifest_file: ?fs.File, @@ -187,9 +188,6 @@ pub const Manifest = struct { /// of the files listed in the manifest. failed_file_index: ?usize = null, - /// most recent problematic timestamp - recent_problematic_timestamp: i128 = 0, - /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. @@ -417,7 +415,7 @@ pub const Manifest = struct { cache_hash_file.stat = actual_stat; - if (try self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { + if (self.cache.isProblematicTimestamp(cache_hash_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed cache_hash_file.stat.mtime = 0; cache_hash_file.stat.inode = 0; @@ -489,7 +487,7 @@ pub const Manifest = struct { ch_file.stat = try file.stat(); - if (try self.isProblematicTimestamp(ch_file.stat.mtime)) { + if (self.cache.isProblematicTimestamp(ch_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed ch_file.stat.mtime = 0; ch_file.stat.inode = 0; @@ -684,26 +682,6 @@ pub const Manifest = struct { self.have_exclusive_lock = true; } - // Create/Write a file, close it, then grab its stat.mtime timestamp. - fn isProblematicTimestamp(self: *Manifest, file_time: i128) !bool { - - // PERF: Check if the file_time is prior to the most recent problematic timestamp - // and break out early if so (avoids an I/O to update the recent_problematic_timestamp) - if (file_time < self.recent_problematic_timestamp) - return false; - - var timestamp_file = try self.cache.manifest_dir.createFile("filetimestamp.tmp", .{ - .read = true, - .truncate = false, - }); - defer timestamp_file.close(); - try timestamp_file.setEndPos(0); - - self.recent_problematic_timestamp = (try timestamp_file.stat()).mtime; - - return (file_time >= self.recent_problematic_timestamp); - } - /// Obtain only the data needed to maintain a lock on the manifest file. /// The `Manifest` remains safe to deinit. /// Don't forget to call `writeManifest` before this! @@ -766,16 +744,34 @@ fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { hasher.final(bin_digest); } +/// Create/Write a file, grab its stat.mtime timestamp, then close it. +/// If any filesystem errors occur, this function returns `true`. +fn isProblematicTimestamp(cache: *Cache, file_time: i128) bool { + // If the file_time is prior to the most recent problematic timestamp + // then we don't need to access the filesystem. + if (file_time < cache.recent_problematic_timestamp) + return false; + + var file = cache.manifest_dir.createFile("timestamp", .{ + .read = true, + .truncate = true, + }) catch return true; + defer file.close(); + + cache.recent_problematic_timestamp = (file.stat() catch return true).mtime; + + return file_time >= cache.recent_problematic_timestamp; +} + // Create/Write a file, close it, then grab its stat.mtime timestamp. fn testGetCurrentFileTimestamp() !i128 { - var timestamp_file = try fs.cwd().createFile("zig-cache/filetimestamp.tmp", .{ + var file = try fs.cwd().createFile("test-filetimestamp.tmp", .{ .read = true, - .truncate = false, + .truncate = true, }); - defer timestamp_file.close(); - try timestamp_file.setEndPos(0); + defer file.close(); - return (try timestamp_file.stat()).mtime; + return (try file.stat()).mtime; } test "cache file and then recall it" { From fdbb329d1009b4c7b23552e65c0344e3ce52b459 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 9 Dec 2021 21:14:39 -0700 Subject: [PATCH 3/4] Cache: fix data race with is_problematic_timestamp Previously `recent_problematic_timestamp` was unprotected and accessed potentially with multiple worker threads simultaneously. This commit protects it with atomics and also introduces a flag to prevent multiple timestamp checks from within the same call to hit(). Unfortunately the compiler-rt function __sync_val_compare_and_swap_16 is not yet implemented, so I will have to take a different strategy in a follow-up commit. --- src/Cache.zig | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/Cache.zig b/src/Cache.zig index 857ddc78e3..03b44fc535 100644 --- a/src/Cache.zig +++ b/src/Cache.zig @@ -2,6 +2,7 @@ gpa: Allocator, manifest_dir: fs.Dir, hash: HashHelper = .{}, recent_problematic_timestamp: i128 = 0, +want_refresh_timestamp: bool = true, const Cache = @This(); const std = @import("std"); @@ -346,6 +347,12 @@ pub const Manifest = struct { } } + // Indicate that we want isProblematicTimestamp to perform a filesystem write in + // order to obtain a problematic timestamp for the next call. Calls after that + // in this same hit() function call will then use the same timestamp, to avoid + // writing multiple times to the filesystem. + @atomicStore(bool, &self.cache.want_refresh_timestamp, true, .Monotonic); + const file_contents = try self.manifest_file.?.reader().readAllAlloc(self.cache.gpa, manifest_file_size_max); defer self.cache.gpa.free(file_contents); @@ -749,18 +756,29 @@ fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { fn isProblematicTimestamp(cache: *Cache, file_time: i128) bool { // If the file_time is prior to the most recent problematic timestamp // then we don't need to access the filesystem. - if (file_time < cache.recent_problematic_timestamp) + var ts = @atomicLoad(i128, &cache.recent_problematic_timestamp, .Monotonic); + if (file_time < ts) return false; - var file = cache.manifest_dir.createFile("timestamp", .{ - .read = true, - .truncate = true, - }) catch return true; - defer file.close(); + if (@atomicRmw(bool, &cache.want_refresh_timestamp, .Xchg, false, .Monotonic)) { + var file = cache.manifest_dir.createFile("timestamp", .{ + .read = true, + .truncate = true, + }) catch return true; + defer file.close(); - cache.recent_problematic_timestamp = (file.stat() catch return true).mtime; + const new_ts = (file.stat() catch return true).mtime; + ts = if (@cmpxchgWeak( + i128, + &cache.recent_problematic_timestamp, + ts, + new_ts, + .Monotonic, + .Monotonic, + )) |race_ts| race_ts else new_ts; + } - return file_time >= cache.recent_problematic_timestamp; + return file_time >= ts; } // Create/Write a file, close it, then grab its stat.mtime timestamp. From 3e618f8432533eaa1cf6dcb39bdcb1b2e5bc1e47 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 9 Dec 2021 22:07:28 -0700 Subject: [PATCH 4/4] Cache: use mutex to protect recent_problematic_timestamp The previous commit tried to use atomics but not many CPUs support 128-bit atomics. So we use a mutex. In order to avoid contention, we also store `recent_problematic_timestamp` locally on the `Manifest` which is only ever accessed from a single thread at a time, and only consult the global one if the local one is problematic. This commit was tested by running `zig build test-behavior` in two separate terminals at the same time. --- src/Cache.zig | 84 ++++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/src/Cache.zig b/src/Cache.zig index 03b44fc535..94ad947f69 100644 --- a/src/Cache.zig +++ b/src/Cache.zig @@ -1,8 +1,9 @@ gpa: Allocator, manifest_dir: fs.Dir, hash: HashHelper = .{}, +/// This value is accessed from multiple threads, protected by mutex. recent_problematic_timestamp: i128 = 0, -want_refresh_timestamp: bool = true, +mutex: std.Thread.Mutex = .{}, const Cache = @This(); const std = @import("std"); @@ -183,11 +184,18 @@ pub const Manifest = struct { /// the same cache directory at the same time. want_shared_lock: bool = true, have_exclusive_lock: bool = false, + // Indicate that we want isProblematicTimestamp to perform a filesystem write in + // order to obtain a problematic timestamp for the next call. Calls after that + // will then use the same timestamp, to avoid unnecessary filesystem writes. + want_refresh_timestamp: bool = true, files: std.ArrayListUnmanaged(File) = .{}, hex_digest: [hex_digest_len]u8, /// Populated when hit() returns an error because of one /// of the files listed in the manifest. failed_file_index: ?usize = null, + /// Keeps track of the last time we performed a file system write to observe + /// what time the file system thinks it is, according to its own granularity. + recent_problematic_timestamp: i128 = 0, /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches @@ -347,11 +355,7 @@ pub const Manifest = struct { } } - // Indicate that we want isProblematicTimestamp to perform a filesystem write in - // order to obtain a problematic timestamp for the next call. Calls after that - // in this same hit() function call will then use the same timestamp, to avoid - // writing multiple times to the filesystem. - @atomicStore(bool, &self.cache.want_refresh_timestamp, true, .Monotonic); + self.want_refresh_timestamp = true; const file_contents = try self.manifest_file.?.reader().readAllAlloc(self.cache.gpa, manifest_file_size_max); defer self.cache.gpa.free(file_contents); @@ -422,7 +426,7 @@ pub const Manifest = struct { cache_hash_file.stat = actual_stat; - if (self.cache.isProblematicTimestamp(cache_hash_file.stat.mtime)) { + if (self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed cache_hash_file.stat.mtime = 0; cache_hash_file.stat.inode = 0; @@ -487,6 +491,40 @@ pub const Manifest = struct { } } + fn isProblematicTimestamp(man: *Manifest, file_time: i128) bool { + // If the file_time is prior to the most recent problematic timestamp + // then we don't need to access the filesystem. + if (file_time < man.recent_problematic_timestamp) + return false; + + // Next we will check the globally shared Cache timestamp, which is accessed + // from multiple threads. + man.cache.mutex.lock(); + defer man.cache.mutex.unlock(); + + // Save the global one to our local one to avoid locking next time. + man.recent_problematic_timestamp = man.cache.recent_problematic_timestamp; + if (file_time < man.recent_problematic_timestamp) + return false; + + // This flag prevents multiple filesystem writes for the same hit() call. + if (man.want_refresh_timestamp) { + man.want_refresh_timestamp = false; + + var file = man.cache.manifest_dir.createFile("timestamp", .{ + .read = true, + .truncate = true, + }) catch return true; + defer file.close(); + + // Save locally and also save globally (we still hold the global lock). + man.recent_problematic_timestamp = (file.stat() catch return true).mtime; + man.cache.recent_problematic_timestamp = man.recent_problematic_timestamp; + } + + return file_time >= man.recent_problematic_timestamp; + } + fn populateFileHash(self: *Manifest, ch_file: *File) !void { log.debug("populateFileHash {s}", .{ch_file.path.?}); const file = try fs.cwd().openFile(ch_file.path.?, .{}); @@ -494,7 +532,7 @@ pub const Manifest = struct { ch_file.stat = try file.stat(); - if (self.cache.isProblematicTimestamp(ch_file.stat.mtime)) { + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed ch_file.stat.mtime = 0; ch_file.stat.inode = 0; @@ -751,36 +789,6 @@ fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { hasher.final(bin_digest); } -/// Create/Write a file, grab its stat.mtime timestamp, then close it. -/// If any filesystem errors occur, this function returns `true`. -fn isProblematicTimestamp(cache: *Cache, file_time: i128) bool { - // If the file_time is prior to the most recent problematic timestamp - // then we don't need to access the filesystem. - var ts = @atomicLoad(i128, &cache.recent_problematic_timestamp, .Monotonic); - if (file_time < ts) - return false; - - if (@atomicRmw(bool, &cache.want_refresh_timestamp, .Xchg, false, .Monotonic)) { - var file = cache.manifest_dir.createFile("timestamp", .{ - .read = true, - .truncate = true, - }) catch return true; - defer file.close(); - - const new_ts = (file.stat() catch return true).mtime; - ts = if (@cmpxchgWeak( - i128, - &cache.recent_problematic_timestamp, - ts, - new_ts, - .Monotonic, - .Monotonic, - )) |race_ts| race_ts else new_ts; - } - - return file_time >= ts; -} - // Create/Write a file, close it, then grab its stat.mtime timestamp. fn testGetCurrentFileTimestamp() !i128 { var file = try fs.cwd().createFile("test-filetimestamp.tmp", .{