mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
std.Build.Cache: use an array hash map for files
Rather than an ArrayList. Provides deduplication.
This commit is contained in:
parent
ebec7336e2
commit
2f4bbd6c63
@ -55,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory {
|
||||
|
||||
const PrefixedPath = struct {
|
||||
prefix: u8,
|
||||
sub_path: []u8,
|
||||
sub_path: []const u8,
|
||||
|
||||
fn eql(a: PrefixedPath, b: PrefixedPath) bool {
|
||||
return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path);
|
||||
}
|
||||
|
||||
fn hash(pp: PrefixedPath) u32 {
|
||||
return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path));
|
||||
}
|
||||
};
|
||||
|
||||
fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {
|
||||
@ -132,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{
|
||||
});
|
||||
|
||||
pub const File = struct {
|
||||
prefixed_path: ?PrefixedPath,
|
||||
prefixed_path: PrefixedPath,
|
||||
max_file_size: ?usize,
|
||||
stat: Stat,
|
||||
bin_digest: BinDigest,
|
||||
@ -145,16 +153,18 @@ pub const File = struct {
|
||||
};
|
||||
|
||||
pub fn deinit(self: *File, gpa: Allocator) void {
|
||||
if (self.prefixed_path) |pp| {
|
||||
gpa.free(pp.sub_path);
|
||||
self.prefixed_path = null;
|
||||
}
|
||||
gpa.free(self.prefixed_path.sub_path);
|
||||
if (self.contents) |contents| {
|
||||
gpa.free(contents);
|
||||
self.contents = null;
|
||||
}
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
pub fn updateMaxSize(file: *File, new_max_size: ?usize) void {
|
||||
const new = new_max_size orelse return;
|
||||
file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new;
|
||||
}
|
||||
};
|
||||
|
||||
pub const HashHelper = struct {
|
||||
@ -296,7 +306,7 @@ pub const Manifest = struct {
|
||||
// order to obtain a problematic timestamp for the next call. Calls after that
|
||||
// will then use the same timestamp, to avoid unnecessary filesystem writes.
|
||||
want_refresh_timestamp: bool = true,
|
||||
files: std.ArrayListUnmanaged(File) = .{},
|
||||
files: Files = .{},
|
||||
hex_digest: HexDigest,
|
||||
/// Populated when hit() returns an error because of one
|
||||
/// of the files listed in the manifest.
|
||||
@ -305,6 +315,34 @@ pub const Manifest = struct {
|
||||
/// what time the file system thinks it is, according to its own granularity.
|
||||
recent_problematic_timestamp: i128 = 0,
|
||||
|
||||
pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);
|
||||
|
||||
pub const FilesContext = struct {
|
||||
pub fn hash(fc: FilesContext, file: File) u32 {
|
||||
_ = fc;
|
||||
return file.prefixed_path.hash();
|
||||
}
|
||||
|
||||
pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool {
|
||||
_ = fc;
|
||||
_ = b_index;
|
||||
return a.prefixed_path.eql(b.prefixed_path);
|
||||
}
|
||||
};
|
||||
|
||||
const FilesAdapter = struct {
|
||||
pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool {
|
||||
_ = context;
|
||||
_ = b_index;
|
||||
return a.eql(b.prefixed_path);
|
||||
}
|
||||
|
||||
pub fn hash(context: @This(), key: PrefixedPath) u32 {
|
||||
_ = context;
|
||||
return key.hash();
|
||||
}
|
||||
};
|
||||
|
||||
/// Add a file as a dependency of process being cached. When `hit` is
|
||||
/// called, the file's contents will be checked to ensure that it matches
|
||||
/// the contents from previous times.
|
||||
@ -317,7 +355,7 @@ pub const Manifest = struct {
|
||||
/// to access the contents of the file after calling `hit()` like so:
|
||||
///
|
||||
/// ```
|
||||
/// var file_contents = cache_hash.files.items[file_index].contents.?;
|
||||
/// var file_contents = cache_hash.files.keys()[file_index].contents.?;
|
||||
/// ```
|
||||
pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {
|
||||
assert(self.manifest_file == null);
|
||||
@ -327,7 +365,12 @@ pub const Manifest = struct {
|
||||
const prefixed_path = try self.cache.findPrefix(file_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
self.files.addOneAssumeCapacity().* = .{
|
||||
const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});
|
||||
if (gop.found_existing) {
|
||||
gop.key_ptr.updateMaxSize(max_file_size);
|
||||
return gop.index;
|
||||
}
|
||||
gop.key_ptr.* = .{
|
||||
.prefixed_path = prefixed_path,
|
||||
.contents = null,
|
||||
.max_file_size = max_file_size,
|
||||
@ -338,7 +381,7 @@ pub const Manifest = struct {
|
||||
self.hash.add(prefixed_path.prefix);
|
||||
self.hash.addBytes(prefixed_path.sub_path);
|
||||
|
||||
return self.files.items.len - 1;
|
||||
return gop.index;
|
||||
}
|
||||
|
||||
pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void {
|
||||
@ -418,7 +461,7 @@ pub const Manifest = struct {
|
||||
|
||||
self.want_refresh_timestamp = true;
|
||||
|
||||
const input_file_count = self.files.items.len;
|
||||
const input_file_count = self.files.entries.len;
|
||||
while (true) : (self.unhit(bin_digest, input_file_count)) {
|
||||
const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max);
|
||||
defer gpa.free(file_contents);
|
||||
@ -430,7 +473,7 @@ pub const Manifest = struct {
|
||||
if (try self.upgradeToExclusiveLock()) continue;
|
||||
self.manifest_dirty = true;
|
||||
while (idx < input_file_count) : (idx += 1) {
|
||||
const ch_file = &self.files.items[idx];
|
||||
const ch_file = &self.files.keys()[idx];
|
||||
self.populateFileHash(ch_file) catch |err| {
|
||||
self.failed_file_index = idx;
|
||||
return err;
|
||||
@ -441,18 +484,6 @@ pub const Manifest = struct {
|
||||
while (line_iter.next()) |line| {
|
||||
defer idx += 1;
|
||||
|
||||
const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: {
|
||||
const new = try self.files.addOne(gpa);
|
||||
new.* = .{
|
||||
.prefixed_path = null,
|
||||
.contents = null,
|
||||
.max_file_size = null,
|
||||
.stat = undefined,
|
||||
.bin_digest = undefined,
|
||||
};
|
||||
break :blk new;
|
||||
};
|
||||
|
||||
var iter = mem.tokenizeScalar(u8, line, ' ');
|
||||
const size = iter.next() orelse return error.InvalidFormat;
|
||||
const inode = iter.next() orelse return error.InvalidFormat;
|
||||
@ -461,30 +492,61 @@ pub const Manifest = struct {
|
||||
const prefix_str = iter.next() orelse return error.InvalidFormat;
|
||||
const file_path = iter.rest();
|
||||
|
||||
cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
|
||||
cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
|
||||
cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
|
||||
_ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
|
||||
const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
|
||||
const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
|
||||
const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
|
||||
const file_bin_digest = b: {
|
||||
if (digest_str.len != hex_digest_len) return error.InvalidFormat;
|
||||
var bd: BinDigest = undefined;
|
||||
_ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat;
|
||||
break :b bd;
|
||||
};
|
||||
|
||||
const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;
|
||||
if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;
|
||||
|
||||
if (file_path.len == 0) {
|
||||
return error.InvalidFormat;
|
||||
}
|
||||
if (cache_hash_file.prefixed_path) |pp| {
|
||||
if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) {
|
||||
return error.InvalidFormat;
|
||||
}
|
||||
}
|
||||
if (file_path.len == 0) return error.InvalidFormat;
|
||||
|
||||
if (cache_hash_file.prefixed_path == null) {
|
||||
cache_hash_file.prefixed_path = .{
|
||||
const cache_hash_file = f: {
|
||||
const prefixed_path: PrefixedPath = .{
|
||||
.prefix = prefix,
|
||||
.sub_path = try gpa.dupe(u8, file_path),
|
||||
.sub_path = file_path, // expires with file_contents
|
||||
};
|
||||
}
|
||||
if (idx < input_file_count) {
|
||||
const file = &self.files.keys()[idx];
|
||||
if (!file.prefixed_path.eql(prefixed_path))
|
||||
return error.InvalidFormat;
|
||||
|
||||
const pp = cache_hash_file.prefixed_path.?;
|
||||
file.stat = .{
|
||||
.size = stat_size,
|
||||
.inode = stat_inode,
|
||||
.mtime = stat_mtime,
|
||||
};
|
||||
file.bin_digest = file_bin_digest;
|
||||
break :f file;
|
||||
}
|
||||
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
|
||||
errdefer assert(self.files.popOrNull() != null);
|
||||
if (!gop.found_existing) {
|
||||
gop.key_ptr.* = .{
|
||||
.prefixed_path = .{
|
||||
.prefix = prefix,
|
||||
.sub_path = try gpa.dupe(u8, file_path),
|
||||
},
|
||||
.contents = null,
|
||||
.max_file_size = null,
|
||||
.stat = .{
|
||||
.size = stat_size,
|
||||
.inode = stat_inode,
|
||||
.mtime = stat_mtime,
|
||||
},
|
||||
.bin_digest = file_bin_digest,
|
||||
};
|
||||
}
|
||||
break :f gop.key_ptr;
|
||||
};
|
||||
|
||||
const pp = cache_hash_file.prefixed_path;
|
||||
const dir = self.cache.prefixes()[pp.prefix].handle;
|
||||
const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {
|
||||
error.FileNotFound => {
|
||||
@ -548,7 +610,7 @@ pub const Manifest = struct {
|
||||
if (try self.upgradeToExclusiveLock()) continue;
|
||||
self.manifest_dirty = true;
|
||||
while (idx < input_file_count) : (idx += 1) {
|
||||
const ch_file = &self.files.items[idx];
|
||||
const ch_file = &self.files.keys()[idx];
|
||||
self.populateFileHash(ch_file) catch |err| {
|
||||
self.failed_file_index = idx;
|
||||
return err;
|
||||
@ -571,12 +633,12 @@ pub const Manifest = struct {
|
||||
self.hash.hasher.update(&bin_digest);
|
||||
|
||||
// Remove files not in the initial hash.
|
||||
for (self.files.items[input_file_count..]) |*file| {
|
||||
for (self.files.keys()[input_file_count..]) |*file| {
|
||||
file.deinit(self.cache.gpa);
|
||||
}
|
||||
self.files.shrinkRetainingCapacity(input_file_count);
|
||||
|
||||
for (self.files.items) |file| {
|
||||
for (self.files.keys()) |file| {
|
||||
self.hash.hasher.update(&file.bin_digest);
|
||||
}
|
||||
}
|
||||
@ -616,7 +678,7 @@ pub const Manifest = struct {
|
||||
}
|
||||
|
||||
fn populateFileHash(self: *Manifest, ch_file: *File) !void {
|
||||
const pp = ch_file.prefixed_path.?;
|
||||
const pp = ch_file.prefixed_path;
|
||||
const dir = self.cache.prefixes()[pp.prefix].handle;
|
||||
const file = try dir.openFile(pp.sub_path, .{});
|
||||
defer file.close();
|
||||
@ -682,7 +744,7 @@ pub const Manifest = struct {
|
||||
.bin_digest = undefined,
|
||||
.contents = null,
|
||||
};
|
||||
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
|
||||
errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1);
|
||||
|
||||
try self.populateFileHash(new_ch_file);
|
||||
|
||||
@ -690,9 +752,11 @@ pub const Manifest = struct {
|
||||
}
|
||||
|
||||
/// Add a file as a dependency of process being cached, after the initial hash has been
|
||||
/// calculated. This is useful for processes that don't know the all the files that
|
||||
/// are depended on ahead of time. For example, a source file that can import other files
|
||||
/// will need to be recompiled if the imported file is changed.
|
||||
/// calculated.
|
||||
///
|
||||
/// This is useful for processes that don't know the all the files that are
|
||||
/// depended on ahead of time. For example, a source file that can import
|
||||
/// other files will need to be recompiled if the imported file is changed.
|
||||
pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {
|
||||
assert(self.manifest_file != null);
|
||||
|
||||
@ -700,17 +764,26 @@ pub const Manifest = struct {
|
||||
const prefixed_path = try self.cache.findPrefix(file_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
const new_ch_file = try self.files.addOne(gpa);
|
||||
new_ch_file.* = .{
|
||||
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
|
||||
errdefer assert(self.files.popOrNull() != null);
|
||||
|
||||
if (gop.found_existing) {
|
||||
gpa.free(prefixed_path.sub_path);
|
||||
return;
|
||||
}
|
||||
|
||||
gop.key_ptr.* = .{
|
||||
.prefixed_path = prefixed_path,
|
||||
.max_file_size = null,
|
||||
.stat = undefined,
|
||||
.bin_digest = undefined,
|
||||
.contents = null,
|
||||
};
|
||||
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
|
||||
|
||||
try self.populateFileHash(new_ch_file);
|
||||
self.files.lockPointers();
|
||||
defer self.files.unlockPointers();
|
||||
|
||||
try self.populateFileHash(gop.key_ptr);
|
||||
}
|
||||
|
||||
/// Like `addFilePost` but when the file contents have already been loaded from disk.
|
||||
@ -724,13 +797,20 @@ pub const Manifest = struct {
|
||||
assert(self.manifest_file != null);
|
||||
const gpa = self.cache.gpa;
|
||||
|
||||
const ch_file = try self.files.addOne(gpa);
|
||||
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
|
||||
|
||||
const prefixed_path = try self.cache.findPrefixResolved(resolved_path);
|
||||
errdefer gpa.free(prefixed_path.sub_path);
|
||||
|
||||
ch_file.* = .{
|
||||
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
|
||||
errdefer assert(self.files.popOrNull() != null);
|
||||
|
||||
if (gop.found_existing) {
|
||||
gpa.free(prefixed_path.sub_path);
|
||||
return;
|
||||
}
|
||||
|
||||
const new_file = gop.key_ptr;
|
||||
|
||||
new_file.* = .{
|
||||
.prefixed_path = prefixed_path,
|
||||
.max_file_size = null,
|
||||
.stat = stat,
|
||||
@ -738,19 +818,19 @@ pub const Manifest = struct {
|
||||
.contents = null,
|
||||
};
|
||||
|
||||
if (self.isProblematicTimestamp(ch_file.stat.mtime)) {
|
||||
if (self.isProblematicTimestamp(new_file.stat.mtime)) {
|
||||
// The actual file has an unreliable timestamp, force it to be hashed
|
||||
ch_file.stat.mtime = 0;
|
||||
ch_file.stat.inode = 0;
|
||||
new_file.stat.mtime = 0;
|
||||
new_file.stat.inode = 0;
|
||||
}
|
||||
|
||||
{
|
||||
var hasher = hasher_init;
|
||||
hasher.update(bytes);
|
||||
hasher.final(&ch_file.bin_digest);
|
||||
hasher.final(&new_file.bin_digest);
|
||||
}
|
||||
|
||||
self.hash.hasher.update(&ch_file.bin_digest);
|
||||
self.hash.hasher.update(&new_file.bin_digest);
|
||||
}
|
||||
|
||||
pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void {
|
||||
@ -816,14 +896,14 @@ pub const Manifest = struct {
|
||||
|
||||
const writer = contents.writer();
|
||||
try writer.writeAll(manifest_header ++ "\n");
|
||||
for (self.files.items) |file| {
|
||||
for (self.files.keys()) |file| {
|
||||
try writer.print("{d} {d} {d} {} {d} {s}\n", .{
|
||||
file.stat.size,
|
||||
file.stat.inode,
|
||||
file.stat.mtime,
|
||||
fmt.fmtSliceHexLower(&file.bin_digest),
|
||||
file.prefixed_path.?.prefix,
|
||||
file.prefixed_path.?.sub_path,
|
||||
file.prefixed_path.prefix,
|
||||
file.prefixed_path.sub_path,
|
||||
});
|
||||
}
|
||||
|
||||
@ -892,7 +972,7 @@ pub const Manifest = struct {
|
||||
|
||||
file.close();
|
||||
}
|
||||
for (self.files.items) |*file| {
|
||||
for (self.files.keys()) |*file| {
|
||||
file.deinit(self.cache.gpa);
|
||||
}
|
||||
self.files.deinit(self.cache.gpa);
|
||||
@ -1061,7 +1141,7 @@ test "check that changing a file makes cache fail" {
|
||||
// There should be nothing in the cache
|
||||
try testing.expectEqual(false, try ch.hit());
|
||||
|
||||
try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?));
|
||||
try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?));
|
||||
|
||||
digest1 = ch.final();
|
||||
|
||||
@ -1081,7 +1161,7 @@ test "check that changing a file makes cache fail" {
|
||||
try testing.expectEqual(false, try ch.hit());
|
||||
|
||||
// The cache system does not keep the contents of re-hashed input files.
|
||||
try testing.expect(ch.files.items[temp_file_idx].contents == null);
|
||||
try testing.expect(ch.files.keys()[temp_file_idx].contents == null);
|
||||
|
||||
digest2 = ch.final();
|
||||
|
||||
|
||||
@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool {
|
||||
|
||||
fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror {
|
||||
const i = man.failed_file_index orelse return err;
|
||||
const pp = man.files.items[i].prefixed_path orelse return err;
|
||||
const pp = man.files.keys()[i].prefixed_path;
|
||||
const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
|
||||
return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path });
|
||||
}
|
||||
|
||||
@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void
|
||||
|
||||
const is_hit = man.hit() catch |err| {
|
||||
const i = man.failed_file_index orelse return err;
|
||||
const pp = man.files.items[i].prefixed_path orelse return err;
|
||||
const pp = man.files.keys()[i].prefixed_path;
|
||||
const prefix = man.cache.prefixes()[pp.prefix];
|
||||
return comp.setMiscFailure(
|
||||
.check_whole_cache,
|
||||
@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module
|
||||
const prev_hash_state = man.hash.peekBin();
|
||||
const actual_hit = hit: {
|
||||
_ = try man.hit();
|
||||
if (man.files.items.len == 0) {
|
||||
if (man.files.entries.len == 0) {
|
||||
man.unhit(prev_hash_state, 0);
|
||||
break :hit false;
|
||||
}
|
||||
|
||||
@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo
|
||||
};
|
||||
defer o_directory.handle.close();
|
||||
|
||||
const abilists_contents = man.files.items[abilists_index].contents.?;
|
||||
const abilists_contents = man.files.keys()[abilists_index].contents.?;
|
||||
const metadata = try loadMetaData(comp.gpa, abilists_contents);
|
||||
defer metadata.destroy(comp.gpa);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user