std.Build.Cache.hit: work around macOS kernel bug

The previous commit cast doubt upon the initial report about macOS
kernel behavior, identifying another reason that ENOENT could be
returned from file creation.

However, it is demonstrable that ENOENT can be returned for both cases:
1. create file race
2. handle refers to deleted directory

This commit re-introduces the workaround for the file creation race on
macOS however it does not unconditionally retry - it first tries again
with O_EXCL to disambiguate the error condition that has occurred.
This commit is contained in:
Andrew Kelley 2024-12-10 20:44:00 -08:00
parent d37ee79535
commit 7ff42eff91
5 changed files with 69 additions and 8 deletions

View File

@ -528,6 +528,43 @@ pub const Manifest = struct {
}; };
break; break;
}, },
error.FileNotFound => {
// There are no dir components, so the only possibility
// should be that the directory behind the handle has been
// deleted, however we have observed on macOS two processes
// racing to do openat() with O_CREAT manifest in ENOENT.
//
// As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of
// the directory of our open handle.
if (builtin.os.tag != .macos) {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
}
if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
.read = true,
.truncate = false,
.lock = .exclusive,
.lock_nonblocking = self.want_shared_lock,
.exclusive = true,
})) |manifest_file| {
self.manifest_file = manifest_file;
self.have_exclusive_lock = true;
break;
} else |excl_err| switch (excl_err) {
error.WouldBlock, error.PathAlreadyExists => continue,
error.FileNotFound => {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
},
else => |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
},
}
},
else => |e| { else => |e| {
self.diagnostic = .{ .manifest_create = e }; self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed; return error.CacheCheckFailed;

View File

@ -770,7 +770,7 @@ fn failWithCacheError(s: *Step, man: *const Build.Cache.Manifest, err: Build.Cac
}, },
}, },
error.OutOfMemory => return error.OutOfMemory, error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return s.fail("failed check cache: invalid manifest file format", .{}), error.InvalidFormat => return s.fail("failed to check cache: invalid manifest file format", .{}),
} }
} }

View File

@ -1541,6 +1541,8 @@ pub const OpenError = error{
/// * One of the path components does not exist. /// * One of the path components does not exist.
/// * Cwd was used, but cwd has been deleted. /// * Cwd was used, but cwd has been deleted.
/// * The path associated with the open directory handle has been deleted. /// * The path associated with the open directory handle has been deleted.
/// * On macOS, multiple processes or threads raced to create the same file
/// with `O.EXCL` set to `false`.
FileNotFound, FileNotFound,
/// The path exceeded `max_path_bytes` bytes. /// The path exceeded `max_path_bytes` bytes.

View File

@ -2069,7 +2069,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
error.OutOfMemory => return error.OutOfMemory, error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return comp.setMiscFailure( error.InvalidFormat => return comp.setMiscFailure(
.check_whole_cache, .check_whole_cache,
"failed check cache: invalid manifest file format", "failed to check cache: invalid manifest file format",
.{}, .{},
), ),
}; };

View File

@ -136,12 +136,34 @@ pub fn astGenFile(
error.NoDevice => unreachable, // it's not a pipe error.NoDevice => unreachable, // it's not a pipe
error.WouldBlock => unreachable, // not asking for non-blocking I/O error.WouldBlock => unreachable, // not asking for non-blocking I/O
error.FileNotFound => { error.FileNotFound => {
// Since there are no dir components this could only occur if // There are no dir components, so the only possibility should
// `zir_dir` is deleted after the compiler process obtains an // be that the directory behind the handle has been deleted,
// open directory handle. // however we have observed on macOS two processes racing to do
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{ // openat() with O_CREAT manifest in ENOENT.
cache_directory, //
}); // As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of the
// directory of our open handle.
if (builtin.os.tag != .macos) {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
}
break zir_dir.createFile(&hex_digest, .{
.read = true,
.truncate = false,
.lock = lock,
.exclusive = true,
}) catch |excl_err| switch (excl_err) {
error.PathAlreadyExists => continue,
error.FileNotFound => {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
},
else => |e| return e,
};
}, },
else => |e| return e, // Retryable errors are handled at callsite. else => |e| return e, // Retryable errors are handled at callsite.