stage1 caching system: detect problematic mtimes

closes #2045
This commit is contained in:
Andrew Kelley 2019-03-11 10:26:08 -04:00
parent fec4555476
commit 3a6f19de48
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
8 changed files with 189 additions and 70 deletions

View File

@ -158,8 +158,10 @@ static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) {
// Ported from std/base64.zig
static Error base64_decode(Slice<uint8_t> dest, Slice<uint8_t> source) {
assert(source.len % 4 == 0);
assert(dest.len == (source.len / 4) * 3);
if (source.len % 4 != 0)
return ErrorInvalidFormat;
if (dest.len != (source.len / 4) * 3)
return ErrorInvalidFormat;
// In Zig this is comptime computed. In C++ it's not worth it to do that.
uint8_t char_to_index[256];
@ -218,15 +220,41 @@ static Error hash_file(uint8_t *digest, OsFile handle, Buf *contents) {
}
}
// If the wall clock time, rounded to the same precision as the
// mtime, is equal to the mtime, then we cannot rely on this mtime
// yet. We will instead save an mtime value that indicates the hash
// must be unconditionally computed.
static bool is_problematic_timestamp(const OsTimeStamp *fs_clock) {
OsTimeStamp wall_clock = os_timestamp_calendar();
// First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock.
if (fs_clock->nsec == 0) {
wall_clock.nsec = 0;
if (fs_clock->sec == 0) {
wall_clock.sec = 0;
} else {
wall_clock.sec &= (-1ull) << ctzll(fs_clock->sec);
}
} else {
wall_clock.nsec &= (-1ull) << ctzll(fs_clock->nsec);
}
return wall_clock.nsec == fs_clock->nsec && wall_clock.sec == fs_clock->sec;
}
static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf, Buf *contents) {
Error err;
assert(chf->path != nullptr);
OsFile this_file;
if ((err = os_file_open_r(chf->path, &this_file, &chf->mtime)))
if ((err = os_file_open_r(chf->path, &this_file, &chf->attr)))
return err;
if (is_problematic_timestamp(&chf->attr.mtime)) {
chf->attr.mtime.sec = 0;
chf->attr.mtime.nsec = 0;
chf->attr.inode = 0;
}
if ((err = hash_file(chf->bin_digest, this_file, contents))) {
os_file_close(this_file);
return err;
@ -278,6 +306,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
size_t input_file_count = ch->files.length;
bool any_file_changed = false;
Error return_code = ErrorNone;
size_t file_i = 0;
SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n"));
for (;; file_i += 1) {
@ -299,7 +328,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48);
}
// caller can notice that out_digest is unmodified.
return ErrorNone;
return return_code;
} else if (!opt_line.is_some) {
break;
} else {
@ -312,57 +341,73 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
SplitIterator it = memSplit(opt_line.value, str(" "));
Optional<Slice<uint8_t>> opt_inode = SplitIterator_next(&it);
if (!opt_inode.is_some) {
return_code = ErrorInvalidFormat;
break;
}
chf->attr.inode = strtoull((const char *)opt_inode.value.ptr, nullptr, 10);
Optional<Slice<uint8_t>> opt_mtime_sec = SplitIterator_next(&it);
if (!opt_mtime_sec.is_some) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10);
chf->attr.mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10);
Optional<Slice<uint8_t>> opt_mtime_nsec = SplitIterator_next(&it);
if (!opt_mtime_nsec.is_some) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10);
chf->attr.mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10);
Optional<Slice<uint8_t>> opt_digest = SplitIterator_next(&it);
if (!opt_digest.is_some) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
Slice<uint8_t> file_path = SplitIterator_rest(&it);
if (file_path.len == 0) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
Buf *this_path = buf_create_from_slice(file_path);
if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) {
os_file_close(ch->manifest_file);
return ErrorInvalidFormat;
return_code = ErrorInvalidFormat;
break;
}
chf->path = this_path;
// if the mtime matches we can trust the digest
OsFile this_file;
OsTimeStamp actual_mtime;
if ((err = os_file_open_r(chf->path, &this_file, &actual_mtime))) {
OsFileAttr actual_attr;
if ((err = os_file_open_r(chf->path, &this_file, &actual_attr))) {
fprintf(stderr, "Unable to open %s\n: %s", buf_ptr(chf->path), err_str(err));
os_file_close(ch->manifest_file);
return ErrorCacheUnavailable;
}
if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) {
if (chf->attr.mtime.sec == actual_attr.mtime.sec &&
chf->attr.mtime.nsec == actual_attr.mtime.nsec &&
chf->attr.inode == actual_attr.inode)
{
os_file_close(this_file);
} else {
// we have to recompute the digest.
// later we'll rewrite the manifest with the new mtime/digest values
ch->manifest_dirty = true;
chf->mtime = actual_mtime;
chf->attr = actual_attr;
if (is_problematic_timestamp(&actual_attr.mtime)) {
chf->attr.mtime.sec = 0;
chf->attr.mtime.nsec = 0;
chf->attr.inode = 0;
}
uint8_t actual_digest[48];
if ((err = hash_file(actual_digest, this_file, nullptr))) {
@ -381,7 +426,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
blake2b_update(&ch->blake, chf->bin_digest, 48);
}
}
if (file_i < input_file_count || file_i == 0) {
if (file_i < input_file_count || file_i == 0 || return_code != ErrorNone) {
// manifest file is empty or missing entries, so this is a cache miss
ch->manifest_dirty = true;
for (; file_i < input_file_count; file_i += 1) {
@ -392,7 +437,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) {
return ErrorCacheUnavailable;
}
}
return ErrorNone;
return return_code;
}
// Cache Hit
return cache_final(ch, out_digest);
@ -499,8 +544,8 @@ static Error write_manifest_file(CacheHash *ch) {
for (size_t i = 0; i < ch->files.length; i += 1) {
CacheHashFile *chf = &ch->files.at(i);
base64_encode({encoded_digest, 64}, {chf->bin_digest, 48});
buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n",
chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path));
buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n",
chf->attr.inode, chf->attr.mtime.sec, chf->attr.mtime.nsec, encoded_digest, buf_ptr(chf->path));
}
if ((err = os_file_overwrite(ch->manifest_file, &contents)))
return err;

View File

@ -15,7 +15,7 @@ struct LinkLib;
struct CacheHashFile {
Buf *path;
OsTimeStamp mtime;
OsFileAttr attr;
uint8_t bin_digest[48];
Buf *contents;
};
@ -57,6 +57,8 @@ void cache_file_opt(CacheHash *ch, Buf *path);
// added any files before calling cache_hit. CacheHash::b64_digest becomes
// available for use after this call, even in the case of a miss, and it
// is a hash of the input parameters only.
// If this function returns ErrorInvalidFormat, that error may be treated
// as a cache miss.
Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest);
// If you did not get a cache hit, call this function for every file

View File

@ -7724,8 +7724,11 @@ static Error define_builtin_compile_vars(CodeGen *g) {
Buf digest = BUF_INIT;
buf_resize(&digest, 0);
if ((err = cache_hit(&cache_hash, &digest)))
return err;
if ((err = cache_hit(&cache_hash, &digest))) {
// Treat an invalid format error as a cache miss.
if (err != ErrorInvalidFormat)
return err;
}
// We should always get a cache hit because there are no
// files in the input hash.
@ -8342,12 +8345,14 @@ static void gen_c_object(CodeGen *g, Buf *self_exe_path, CFile *c_file) {
Buf digest = BUF_INIT;
buf_resize(&digest, 0);
if ((err = cache_hit(cache_hash, &digest))) {
if (err == ErrorCacheUnavailable) {
// already printed error
} else {
fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err));
if (err != ErrorInvalidFormat) {
if (err == ErrorCacheUnavailable) {
// already printed error
} else {
fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err));
}
exit(1);
}
exit(1);
}
bool is_cache_miss = (buf_len(&digest) == 0);
if (is_cache_miss) {
@ -8993,7 +8998,10 @@ void codegen_print_timing_report(CodeGen *g, FILE *f) {
}
void codegen_add_time_event(CodeGen *g, const char *name) {
g->timing_events.append({os_get_time(), name});
OsTimeStamp timestamp = os_timestamp_monotonic();
double seconds = (double)timestamp.sec;
seconds += ((double)timestamp.nsec) / 1000000000.0;
g->timing_events.append({seconds, name});
}
static void add_cache_pkg(CodeGen *g, CacheHash *ch, ZigPackage *pkg) {
@ -9090,8 +9098,10 @@ static Error check_cache(CodeGen *g, Buf *manifest_dir, Buf *digest) {
cache_list_of_file(ch, g->link_objects.items, g->link_objects.length);
buf_resize(digest, 0);
if ((err = cache_hit(ch, digest)))
return err;
if ((err = cache_hit(ch, digest))) {
if (err != ErrorInvalidFormat)
return err;
}
if (ch->manifest_file_path != nullptr) {
g->caches_to_release.append(ch);

View File

@ -75,8 +75,10 @@ Error get_compiler_id(Buf **result) {
cache_file(ch, &self_exe_path);
buf_resize(&saved_compiler_id, 0);
if ((err = cache_hit(ch, &saved_compiler_id)))
return err;
if ((err = cache_hit(ch, &saved_compiler_id))) {
if (err != ErrorInvalidFormat)
return err;
}
if (buf_len(&saved_compiler_id) != 0) {
cache_release(ch);
*result = &saved_compiler_id;

View File

@ -18734,8 +18734,10 @@ static IrInstruction *ir_analyze_instruction_c_import(IrAnalyze *ira, IrInstruct
Buf tmp_c_file_digest = BUF_INIT;
buf_resize(&tmp_c_file_digest, 0);
if ((err = cache_hit(cache_hash, &tmp_c_file_digest))) {
ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err)));
return ira->codegen->invalid_instruction;
if (err != ErrorInvalidFormat) {
ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err)));
return ira->codegen->invalid_instruction;
}
}
ira->codegen->caches_to_release.append(cache_hash);

View File

@ -71,8 +71,10 @@ typedef SSIZE_T ssize_t;
#if defined(ZIG_OS_WINDOWS)
static double win32_time_resolution;
static LARGE_INTEGER windows_perf_freq;
#elif defined(__MACH__)
static clock_serv_t cclock;
static clock_serv_t macos_calendar_clock;
static clock_serv_t macos_monotonic_clock;
#endif
#include <stdlib.h>
@ -1233,28 +1235,60 @@ Error os_rename(Buf *src_path, Buf *dest_path) {
return ErrorNone;
}
double os_get_time(void) {
#if defined(ZIG_OS_WINDOWS)
unsigned __int64 time;
QueryPerformanceCounter((LARGE_INTEGER*) &time);
return time * win32_time_resolution;
static void windows_filetime_to_os_timestamp(FILETIME *ft, OsTimeStamp *mtime) {
mtime->sec = (((ULONGLONG) ft->dwHighDateTime) << 32) + ft->dwLowDateTime;
mtime->nsec = 0;
}
#endif
OsTimeStamp os_timestamp_calendar(void) {
OsTimeStamp result;
#if defined(ZIG_OS_WINDOWS)
FILETIME ft;
GetSystemTimeAsFileTime(&ft);
windows_filetime_to_os_timestamp(&ft, &result);
#elif defined(__MACH__)
mach_timespec_t mts;
kern_return_t err = clock_get_time(cclock, &mts);
kern_return_t err = clock_get_time(macos_calendar_clock, &mts);
assert(!err);
double seconds = (double)mts.tv_sec;
seconds += ((double)mts.tv_nsec) / 1000000000.0;
result.sec = mts.tv_sec;
result.nsec = mts.tv_nsec;
#else
struct timespec tms;
clock_gettime(CLOCK_REALTIME, &tms);
return seconds;
result.sec = tms.tv_sec;
result.nsec = tms.tv_nsec;
#endif
return result;
}
OsTimeStamp os_timestamp_monotonic(void) {
OsTimeStamp result;
#if defined(ZIG_OS_WINDOWS)
LARGE_INTEGER counts;
QueryPerformanceCounter(&counts);
result.sec = counts / windows_perf_freq;
result.nsec = (counts % windows_perf_freq) * 1000000000u / windows_perf_freq;
#elif defined(__MACH__)
mach_timespec_t mts;
kern_return_t err = clock_get_time(macos_monotonic_clock, &mts);
assert(!err);
result.sec = mts.tv_sec;
result.nsec = mts.tv_nsec;
#else
struct timespec tms;
clock_gettime(CLOCK_MONOTONIC, &tms);
double seconds = (double)tms.tv_sec;
seconds += ((double)tms.tv_nsec) / 1000000000.0;
return seconds;
result.sec = tms.tv_sec;
result.nsec = tms.tv_nsec;
#endif
return result;
}
Error os_make_path(Buf *path) {
@ -1352,14 +1386,14 @@ int os_init(void) {
#if defined(ZIG_OS_WINDOWS)
_setmode(fileno(stdout), _O_BINARY);
_setmode(fileno(stderr), _O_BINARY);
unsigned __int64 frequency;
if (QueryPerformanceFrequency((LARGE_INTEGER*) &frequency)) {
win32_time_resolution = 1.0 / (double) frequency;
if (QueryPerformanceFrequency(&windows_perf_freq)) {
win32_time_resolution = 1.0 / (double) windows_perf_freq;
} else {
return ErrorSystemResources;
}
#elif defined(__MACH__)
host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock);
host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &macos_monotonic_clock);
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &macos_calendar_clock);
#endif
return 0;
}
@ -1780,7 +1814,7 @@ Error os_self_exe_shared_libs(ZigList<Buf *> &paths) {
#endif
}
Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
Error os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr) {
#if defined(ZIG_OS_WINDOWS)
// TODO use CreateFileW
HANDLE result = CreateFileA(buf_ptr(full_path), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
@ -1808,14 +1842,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
}
*out_file = result;
if (mtime != nullptr) {
FILETIME last_write_time;
if (!GetFileTime(result, nullptr, nullptr, &last_write_time)) {
if (attr != nullptr) {
BY_HANDLE_FILE_INFORMATION file_info;
if (!GetFileInformationByHandle(result, &file_info)) {
CloseHandle(result);
return ErrorUnexpected;
}
mtime->sec = (((ULONGLONG) last_write_time.dwHighDateTime) << 32) + last_write_time.dwLowDateTime;
mtime->nsec = 0;
windows_filetime_to_os_timestamp(&file_info.ftLastWriteTime, &attr->mtime);
attr->inode = (((uint64_t)file_info.nFileIndexHigh) << 32) | file_info.nFileIndexLow;
}
return ErrorNone;
@ -1851,13 +1885,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) {
}
*out_file = fd;
if (mtime != nullptr) {
if (attr != nullptr) {
attr->inode = statbuf.st_ino;
#if defined(ZIG_OS_DARWIN)
mtime->sec = statbuf.st_mtimespec.tv_sec;
mtime->nsec = statbuf.st_mtimespec.tv_nsec;
attr->mtime.sec = statbuf.st_mtimespec.tv_sec;
attr->mtime.nsec = statbuf.st_mtimespec.tv_nsec;
#else
mtime->sec = statbuf.st_mtim.tv_sec;
mtime->nsec = statbuf.st_mtim.tv_nsec;
attr->mtime.sec = statbuf.st_mtim.tv_sec;
attr->mtime.nsec = statbuf.st_mtim.tv_nsec;
#endif
}
return ErrorNone;

View File

@ -85,6 +85,11 @@ struct OsTimeStamp {
uint64_t nsec;
};
struct OsFileAttr {
OsTimeStamp mtime;
uint64_t inode;
};
int os_init(void);
void os_spawn_process(const char *exe, ZigList<const char *> &args, Termination *term);
@ -103,7 +108,7 @@ bool os_path_is_absolute(Buf *path);
Error ATTRIBUTE_MUST_USE os_make_path(Buf *path);
Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path);
Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime);
Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr);
Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file);
Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len);
Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents);
@ -126,7 +131,8 @@ Error os_delete_file(Buf *path);
Error ATTRIBUTE_MUST_USE os_file_exists(Buf *full_path, bool *result);
Error os_rename(Buf *src_path, Buf *dest_path);
double os_get_time(void);
OsTimeStamp os_timestamp_monotonic(void);
OsTimeStamp os_timestamp_calendar(void);
bool os_is_sep(uint8_t c);

View File

@ -63,10 +63,27 @@ static inline int clzll(unsigned long long mask) {
return 63 - lz;
#endif
}
static inline int ctzll(unsigned long long mask) {
unsigned long result;
#if defined(_WIN64)
if (_BitScanForward64(&result, mask))
return result;
zig_unreachable();
#else
if (_BitScanForward(&result, mask & 0xffffffff))
return result;
}
if (_BitScanForward(&result, mask >> 32))
return 32 + result;
zig_unreachable();
#endif
}
#else
#define clzll(x) __builtin_clzll(x)
#define ctzll(x) __builtin_ctzll(x)
#endif
template<typename T>
ATTRIBUTE_RETURNS_NOALIAS static inline T *allocate_nonzero(size_t count) {
#ifndef NDEBUG