Merge pull request #22202 from ziglang/Cache.hit

std.Build.Cache.hit: more discipline in error handling
This commit is contained in:
Andrew Kelley 2024-12-11 14:57:11 -05:00 committed by GitHub
commit 3670910f20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 201 additions and 49 deletions

View File

@ -40,7 +40,7 @@ pub fn addPrefix(cache: *Cache, directory: Directory) void {
/// Be sure to call `Manifest.deinit` after successful initialization.
pub fn obtain(cache: *Cache) Manifest {
return Manifest{
return .{
.cache = cache,
.hash = cache.hash,
.manifest_file = null,
@ -99,9 +99,9 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
}
fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
const relative = try std.fs.path.relative(allocator, prefix, path);
const relative = try fs.path.relative(allocator, prefix, path);
errdefer allocator.free(relative);
var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch {
var component_iterator = fs.path.NativeComponentIterator.init(relative) catch {
return error.NotASubPath;
};
if (component_iterator.root() != null) {
@ -327,13 +327,27 @@ pub const Manifest = struct {
want_refresh_timestamp: bool = true,
files: Files = .{},
hex_digest: HexDigest,
/// Populated when hit() returns an error because of one
/// of the files listed in the manifest.
failed_file_index: ?usize = null,
diagnostic: Diagnostic = .none,
/// Keeps track of the last time we performed a file system write to observe
/// what time the file system thinks it is, according to its own granularity.
recent_problematic_timestamp: i128 = 0,
pub const Diagnostic = union(enum) {
none,
manifest_create: fs.File.OpenError,
manifest_read: fs.File.ReadError,
manifest_lock: fs.File.LockError,
file_open: FileOp,
file_stat: FileOp,
file_read: FileOp,
file_hash: FileOp,
pub const FileOp = struct {
file_index: usize,
err: anyerror,
};
};
pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);
pub const FilesContext = struct {
@ -452,6 +466,15 @@ pub const Manifest = struct {
return self.addDepFileMaybePost(dir, dep_file_basename);
}
pub const HitError = error{
/// Unable to check the cache for a reason that has been recorded into
/// the `diagnostic` field.
CacheCheckFailed,
/// A cache manifest file exists however it could not be parsed.
InvalidFormat,
OutOfMemory,
};
/// Check the cache to see if the input exists in it. If it exists, returns `true`.
/// A hex encoding of its hash is available by calling `final`.
///
@ -464,11 +487,11 @@ pub const Manifest = struct {
/// The lock on the manifest file is released when `deinit` is called. As another
/// option, one may call `toOwnedLock` to obtain a smaller object which can represent
/// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called.
pub fn hit(self: *Manifest) !bool {
pub fn hit(self: *Manifest) HitError!bool {
const gpa = self.cache.gpa;
assert(self.manifest_file == null);
self.failed_file_index = null;
self.diagnostic = .none;
const ext = ".txt";
var manifest_file_path: [hex_digest_len + ext.len]u8 = undefined;
@ -496,17 +519,56 @@ pub const Manifest = struct {
break;
} else |err| switch (err) {
error.WouldBlock => {
self.manifest_file = try self.cache.manifest_dir.openFile(&manifest_file_path, .{
self.manifest_file = self.cache.manifest_dir.openFile(&manifest_file_path, .{
.mode = .read_write,
.lock = .shared,
});
}) catch |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
};
break;
},
// There are no dir components, so you would think that this was
// unreachable, however we have observed on macOS two processes racing
// to do openat() with O_CREAT manifest in ENOENT.
error.FileNotFound => continue,
else => |e| return e,
error.FileNotFound => {
// There are no dir components, so the only possibility
// should be that the directory behind the handle has been
// deleted, however we have observed on macOS two processes
// racing to do openat() with O_CREAT manifest in ENOENT.
//
// As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of
// the directory of our open handle.
if (builtin.os.tag != .macos) {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
}
if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
.read = true,
.truncate = false,
.lock = .exclusive,
.lock_nonblocking = self.want_shared_lock,
.exclusive = true,
})) |manifest_file| {
self.manifest_file = manifest_file;
self.have_exclusive_lock = true;
break;
} else |excl_err| switch (excl_err) {
error.WouldBlock, error.PathAlreadyExists => continue,
error.FileNotFound => {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
},
else => |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
},
}
},
else => |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
},
}
}
@ -514,7 +576,14 @@ pub const Manifest = struct {
const input_file_count = self.files.entries.len;
while (true) : (self.unhit(bin_digest, input_file_count)) {
const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max);
const file_contents = self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.StreamTooLong => return error.OutOfMemory,
else => |e| {
self.diagnostic = .{ .manifest_read = e };
return error.CacheCheckFailed;
},
};
defer gpa.free(file_contents);
var any_file_changed = false;
@ -526,8 +595,11 @@ pub const Manifest = struct {
while (idx < input_file_count) : (idx += 1) {
const ch_file = &self.files.keys()[idx];
self.populateFileHash(ch_file) catch |err| {
self.failed_file_index = idx;
return err;
self.diagnostic = .{ .file_hash = .{
.file_index = idx,
.err = err,
} };
return error.CacheCheckFailed;
};
}
return false;
@ -605,13 +677,22 @@ pub const Manifest = struct {
if (try self.upgradeToExclusiveLock()) continue;
return false;
},
else => return error.CacheUnavailable,
else => |e| {
self.diagnostic = .{ .file_open = .{
.file_index = idx,
.err = e,
} };
return error.CacheCheckFailed;
},
};
defer this_file.close();
const actual_stat = this_file.stat() catch |err| {
self.failed_file_index = idx;
return err;
self.diagnostic = .{ .file_stat = .{
.file_index = idx,
.err = err,
} };
return error.CacheCheckFailed;
};
const size_match = actual_stat.size == cache_hash_file.stat.size;
const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime;
@ -634,8 +715,11 @@ pub const Manifest = struct {
var actual_digest: BinDigest = undefined;
hashFile(this_file, &actual_digest) catch |err| {
self.failed_file_index = idx;
return err;
self.diagnostic = .{ .file_read = .{
.file_index = idx,
.err = err,
} };
return error.CacheCheckFailed;
};
if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
@ -662,17 +746,22 @@ pub const Manifest = struct {
if (try self.upgradeToExclusiveLock()) continue;
self.manifest_dirty = true;
while (idx < input_file_count) : (idx += 1) {
const ch_file = &self.files.keys()[idx];
self.populateFileHash(ch_file) catch |err| {
self.failed_file_index = idx;
return err;
self.populateFileHash(&self.files.keys()[idx]) catch |err| {
self.diagnostic = .{ .file_hash = .{
.file_index = idx,
.err = err,
} };
return error.CacheCheckFailed;
};
}
return false;
}
if (self.want_shared_lock) {
try self.downgradeToSharedLock();
self.downgradeToSharedLock() catch |err| {
self.diagnostic = .{ .manifest_lock = err };
return error.CacheCheckFailed;
};
}
return true;
@ -1010,7 +1099,7 @@ pub const Manifest = struct {
self.have_exclusive_lock = false;
}
fn upgradeToExclusiveLock(self: *Manifest) !bool {
fn upgradeToExclusiveLock(self: *Manifest) error{CacheCheckFailed}!bool {
if (self.have_exclusive_lock) return false;
assert(self.manifest_file != null);
@ -1022,7 +1111,10 @@ pub const Manifest = struct {
// Here we intentionally have a period where the lock is released, in case there are
// other processes holding a shared lock.
manifest_file.unlock();
try manifest_file.lock(.exclusive);
manifest_file.lock(.exclusive) catch |err| {
self.diagnostic = .{ .manifest_lock = err };
return error.CacheCheckFailed;
};
}
self.have_exclusive_lock = true;
return true;
@ -1132,7 +1224,7 @@ pub fn writeSmallFile(dir: fs.Dir, sub_path: []const u8, data: []const u8) !void
}
}
fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void {
fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) fs.File.PReadError!void {
var buf: [1024]u8 = undefined;
var hasher = hasher_init;
var off: u64 = 0;

View File

@ -754,11 +754,24 @@ pub fn cacheHitAndWatch(s: *Step, man: *Build.Cache.Manifest) !bool {
return is_hit;
}
fn failWithCacheError(s: *Step, man: *const Build.Cache.Manifest, err: anyerror) anyerror {
const i = man.failed_file_index orelse return err;
const pp = man.files.keys()[i].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path });
fn failWithCacheError(s: *Step, man: *const Build.Cache.Manifest, err: Build.Cache.Manifest.HitError) error{ OutOfMemory, MakeFailed } {
switch (err) {
error.CacheCheckFailed => switch (man.diagnostic) {
.none => unreachable,
.manifest_create, .manifest_read, .manifest_lock => |e| return s.fail("failed to check cache: {s} {s}", .{
@tagName(man.diagnostic), @errorName(e),
}),
.file_open, .file_stat, .file_read, .file_hash => |op| {
const pp = man.files.keys()[op.file_index].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
return s.fail("failed to check cache: '{s}{s}' {s} {s}", .{
prefix, pp.sub_path, @tagName(man.diagnostic), @errorName(op.err),
});
},
},
error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return s.fail("failed to check cache: invalid manifest file format", .{}),
}
}
/// Prefer `writeManifestAndWatch` unless you already added watch inputs

View File

@ -1537,6 +1537,12 @@ pub const OpenError = error{
ProcessFdQuotaExceeded,
SystemFdQuotaExceeded,
NoDevice,
/// Either:
/// * One of the path components does not exist.
/// * Cwd was used, but cwd has been deleted.
/// * The path associated with the open directory handle has been deleted.
/// * On macOS, multiple processes or threads raced to create the same file
/// with `O.EXCL` set to `false`.
FileNotFound,
/// The path exceeded `max_path_bytes` bytes.

View File

@ -2048,15 +2048,30 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
whole.cache_manifest = &man;
try addNonIncrementalStuffToCacheManifest(comp, arena, &man);
const is_hit = man.hit() catch |err| {
const i = man.failed_file_index orelse return err;
const pp = man.files.keys()[i].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix];
return comp.setMiscFailure(
const is_hit = man.hit() catch |err| switch (err) {
error.CacheCheckFailed => switch (man.diagnostic) {
.none => unreachable,
.manifest_create, .manifest_read, .manifest_lock => |e| return comp.setMiscFailure(
.check_whole_cache,
"failed to check cache: {s} {s}",
.{ @tagName(man.diagnostic), @errorName(e) },
),
.file_open, .file_stat, .file_read, .file_hash => |op| {
const pp = man.files.keys()[op.file_index].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix];
return comp.setMiscFailure(
.check_whole_cache,
"failed to check cache: '{}{s}' {s} {s}",
.{ prefix, pp.sub_path, @tagName(man.diagnostic), @errorName(op.err) },
);
},
},
error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return comp.setMiscFailure(
.check_whole_cache,
"unable to check cache: stat file '{}{s}' failed: {s}",
.{ prefix, pp.sub_path, @errorName(err) },
);
"failed to check cache: invalid manifest file format",
.{},
),
};
if (is_hit) {
// In this case the cache hit contains the full set of file system inputs. Nice!

View File

@ -135,10 +135,36 @@ pub fn astGenFile(
error.PipeBusy => unreachable, // it's not a pipe
error.NoDevice => unreachable, // it's not a pipe
error.WouldBlock => unreachable, // not asking for non-blocking I/O
// There are no dir components, so you would think that this was
// unreachable, however we have observed on macOS two processes racing
// to do openat() with O_CREAT manifest in ENOENT.
error.FileNotFound => continue,
error.FileNotFound => {
// There are no dir components, so the only possibility should
// be that the directory behind the handle has been deleted,
// however we have observed on macOS two processes racing to do
// openat() with O_CREAT manifest in ENOENT.
//
// As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of the
// directory of our open handle.
if (builtin.os.tag != .macos) {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
}
break zir_dir.createFile(&hex_digest, .{
.read = true,
.truncate = false,
.lock = lock,
.exclusive = true,
}) catch |excl_err| switch (excl_err) {
error.PathAlreadyExists => continue,
error.FileNotFound => {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
},
else => |e| return e,
};
},
else => |e| return e, // Retryable errors are handled at callsite.
};

View File

@ -768,7 +768,7 @@ pub const File = struct {
/// TODO audit this error set. most of these should be collapsed into one error,
/// and Diags.Flags should be updated to convey the meaning to the user.
pub const FlushError = error{
CacheUnavailable,
CacheCheckFailed,
CurrentWorkingDirectoryUnlinked,
DivisionByZero,
DllImportLibraryNotFound,