From 3a6f19de48366a616eaffd9dd6c4d4712e0b6c27 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 11 Mar 2019 10:26:08 -0400 Subject: [PATCH] stage1 caching system: detect problematic mtimes closes #2045 --- src/cache_hash.cpp | 97 +++++++++++++++++++++++++++++++++------------- src/cache_hash.hpp | 4 +- src/codegen.cpp | 30 +++++++++----- src/compiler.cpp | 6 ++- src/ir.cpp | 6 ++- src/os.cpp | 89 +++++++++++++++++++++++++++++------------- src/os.hpp | 10 ++++- src/util.hpp | 17 ++++++++ 8 files changed, 189 insertions(+), 70 deletions(-) diff --git a/src/cache_hash.cpp b/src/cache_hash.cpp index 6c9cf12962..2f5cdde961 100644 --- a/src/cache_hash.cpp +++ b/src/cache_hash.cpp @@ -158,8 +158,10 @@ static void base64_encode(Slice dest, Slice source) { // Ported from std/base64.zig static Error base64_decode(Slice dest, Slice source) { - assert(source.len % 4 == 0); - assert(dest.len == (source.len / 4) * 3); + if (source.len % 4 != 0) + return ErrorInvalidFormat; + if (dest.len != (source.len / 4) * 3) + return ErrorInvalidFormat; // In Zig this is comptime computed. In C++ it's not worth it to do that. uint8_t char_to_index[256]; @@ -218,15 +220,41 @@ static Error hash_file(uint8_t *digest, OsFile handle, Buf *contents) { } } +// If the wall clock time, rounded to the same precision as the +// mtime, is equal to the mtime, then we cannot rely on this mtime +// yet. We will instead save an mtime value that indicates the hash +// must be unconditionally computed. +static bool is_problematic_timestamp(const OsTimeStamp *fs_clock) { + OsTimeStamp wall_clock = os_timestamp_calendar(); + // First make all the least significant zero bits in the fs_clock, also zero bits in the wall clock. + if (fs_clock->nsec == 0) { + wall_clock.nsec = 0; + if (fs_clock->sec == 0) { + wall_clock.sec = 0; + } else { + wall_clock.sec &= (-1ull) << ctzll(fs_clock->sec); + } + } else { + wall_clock.nsec &= (-1ull) << ctzll(fs_clock->nsec); + } + return wall_clock.nsec == fs_clock->nsec && wall_clock.sec == fs_clock->sec; +} + static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf, Buf *contents) { Error err; assert(chf->path != nullptr); OsFile this_file; - if ((err = os_file_open_r(chf->path, &this_file, &chf->mtime))) + if ((err = os_file_open_r(chf->path, &this_file, &chf->attr))) return err; + if (is_problematic_timestamp(&chf->attr.mtime)) { + chf->attr.mtime.sec = 0; + chf->attr.mtime.nsec = 0; + chf->attr.inode = 0; + } + if ((err = hash_file(chf->bin_digest, this_file, contents))) { os_file_close(this_file); return err; @@ -278,6 +306,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) { size_t input_file_count = ch->files.length; bool any_file_changed = false; + Error return_code = ErrorNone; size_t file_i = 0; SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n")); for (;; file_i += 1) { @@ -299,7 +328,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) { blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48); } // caller can notice that out_digest is unmodified. - return ErrorNone; + return return_code; } else if (!opt_line.is_some) { break; } else { @@ -312,57 +341,73 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) { SplitIterator it = memSplit(opt_line.value, str(" ")); + Optional> opt_inode = SplitIterator_next(&it); + if (!opt_inode.is_some) { + return_code = ErrorInvalidFormat; + break; + } + chf->attr.inode = strtoull((const char *)opt_inode.value.ptr, nullptr, 10); + Optional> opt_mtime_sec = SplitIterator_next(&it); if (!opt_mtime_sec.is_some) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } - chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10); + chf->attr.mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10); Optional> opt_mtime_nsec = SplitIterator_next(&it); if (!opt_mtime_nsec.is_some) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } - chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10); + chf->attr.mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10); Optional> opt_digest = SplitIterator_next(&it); if (!opt_digest.is_some) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } Slice file_path = SplitIterator_rest(&it); if (file_path.len == 0) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } Buf *this_path = buf_create_from_slice(file_path); if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) { - os_file_close(ch->manifest_file); - return ErrorInvalidFormat; + return_code = ErrorInvalidFormat; + break; } chf->path = this_path; // if the mtime matches we can trust the digest OsFile this_file; - OsTimeStamp actual_mtime; - if ((err = os_file_open_r(chf->path, &this_file, &actual_mtime))) { + OsFileAttr actual_attr; + if ((err = os_file_open_r(chf->path, &this_file, &actual_attr))) { fprintf(stderr, "Unable to open %s\n: %s", buf_ptr(chf->path), err_str(err)); os_file_close(ch->manifest_file); return ErrorCacheUnavailable; } - if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) { + if (chf->attr.mtime.sec == actual_attr.mtime.sec && + chf->attr.mtime.nsec == actual_attr.mtime.nsec && + chf->attr.inode == actual_attr.inode) + { os_file_close(this_file); } else { // we have to recompute the digest. // later we'll rewrite the manifest with the new mtime/digest values ch->manifest_dirty = true; - chf->mtime = actual_mtime; + chf->attr = actual_attr; + + if (is_problematic_timestamp(&actual_attr.mtime)) { + chf->attr.mtime.sec = 0; + chf->attr.mtime.nsec = 0; + chf->attr.inode = 0; + } uint8_t actual_digest[48]; if ((err = hash_file(actual_digest, this_file, nullptr))) { @@ -381,7 +426,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) { blake2b_update(&ch->blake, chf->bin_digest, 48); } } - if (file_i < input_file_count || file_i == 0) { + if (file_i < input_file_count || file_i == 0 || return_code != ErrorNone) { // manifest file is empty or missing entries, so this is a cache miss ch->manifest_dirty = true; for (; file_i < input_file_count; file_i += 1) { @@ -392,7 +437,7 @@ Error cache_hit(CacheHash *ch, Buf *out_digest) { return ErrorCacheUnavailable; } } - return ErrorNone; + return return_code; } // Cache Hit return cache_final(ch, out_digest); @@ -499,8 +544,8 @@ static Error write_manifest_file(CacheHash *ch) { for (size_t i = 0; i < ch->files.length; i += 1) { CacheHashFile *chf = &ch->files.at(i); base64_encode({encoded_digest, 64}, {chf->bin_digest, 48}); - buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n", - chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path)); + buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n", + chf->attr.inode, chf->attr.mtime.sec, chf->attr.mtime.nsec, encoded_digest, buf_ptr(chf->path)); } if ((err = os_file_overwrite(ch->manifest_file, &contents))) return err; diff --git a/src/cache_hash.hpp b/src/cache_hash.hpp index 5be02ea405..e2a10270f3 100644 --- a/src/cache_hash.hpp +++ b/src/cache_hash.hpp @@ -15,7 +15,7 @@ struct LinkLib; struct CacheHashFile { Buf *path; - OsTimeStamp mtime; + OsFileAttr attr; uint8_t bin_digest[48]; Buf *contents; }; @@ -57,6 +57,8 @@ void cache_file_opt(CacheHash *ch, Buf *path); // added any files before calling cache_hit. CacheHash::b64_digest becomes // available for use after this call, even in the case of a miss, and it // is a hash of the input parameters only. +// If this function returns ErrorInvalidFormat, that error may be treated +// as a cache miss. Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest); // If you did not get a cache hit, call this function for every file diff --git a/src/codegen.cpp b/src/codegen.cpp index 71f9f0217b..332080c2f5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -7724,8 +7724,11 @@ static Error define_builtin_compile_vars(CodeGen *g) { Buf digest = BUF_INIT; buf_resize(&digest, 0); - if ((err = cache_hit(&cache_hash, &digest))) - return err; + if ((err = cache_hit(&cache_hash, &digest))) { + // Treat an invalid format error as a cache miss. + if (err != ErrorInvalidFormat) + return err; + } // We should always get a cache hit because there are no // files in the input hash. @@ -8342,12 +8345,14 @@ static void gen_c_object(CodeGen *g, Buf *self_exe_path, CFile *c_file) { Buf digest = BUF_INIT; buf_resize(&digest, 0); if ((err = cache_hit(cache_hash, &digest))) { - if (err == ErrorCacheUnavailable) { - // already printed error - } else { - fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err)); + if (err != ErrorInvalidFormat) { + if (err == ErrorCacheUnavailable) { + // already printed error + } else { + fprintf(stderr, "unable to check cache when compiling C object: %s\n", err_str(err)); + } + exit(1); } - exit(1); } bool is_cache_miss = (buf_len(&digest) == 0); if (is_cache_miss) { @@ -8993,7 +8998,10 @@ void codegen_print_timing_report(CodeGen *g, FILE *f) { } void codegen_add_time_event(CodeGen *g, const char *name) { - g->timing_events.append({os_get_time(), name}); + OsTimeStamp timestamp = os_timestamp_monotonic(); + double seconds = (double)timestamp.sec; + seconds += ((double)timestamp.nsec) / 1000000000.0; + g->timing_events.append({seconds, name}); } static void add_cache_pkg(CodeGen *g, CacheHash *ch, ZigPackage *pkg) { @@ -9090,8 +9098,10 @@ static Error check_cache(CodeGen *g, Buf *manifest_dir, Buf *digest) { cache_list_of_file(ch, g->link_objects.items, g->link_objects.length); buf_resize(digest, 0); - if ((err = cache_hit(ch, digest))) - return err; + if ((err = cache_hit(ch, digest))) { + if (err != ErrorInvalidFormat) + return err; + } if (ch->manifest_file_path != nullptr) { g->caches_to_release.append(ch); diff --git a/src/compiler.cpp b/src/compiler.cpp index fa153aa2d8..7ec1485fad 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -75,8 +75,10 @@ Error get_compiler_id(Buf **result) { cache_file(ch, &self_exe_path); buf_resize(&saved_compiler_id, 0); - if ((err = cache_hit(ch, &saved_compiler_id))) - return err; + if ((err = cache_hit(ch, &saved_compiler_id))) { + if (err != ErrorInvalidFormat) + return err; + } if (buf_len(&saved_compiler_id) != 0) { cache_release(ch); *result = &saved_compiler_id; diff --git a/src/ir.cpp b/src/ir.cpp index a8771285f6..bd5944d922 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -18734,8 +18734,10 @@ static IrInstruction *ir_analyze_instruction_c_import(IrAnalyze *ira, IrInstruct Buf tmp_c_file_digest = BUF_INIT; buf_resize(&tmp_c_file_digest, 0); if ((err = cache_hit(cache_hash, &tmp_c_file_digest))) { - ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err))); - return ira->codegen->invalid_instruction; + if (err != ErrorInvalidFormat) { + ir_add_error_node(ira, node, buf_sprintf("C import failed: unable to check cache: %s", err_str(err))); + return ira->codegen->invalid_instruction; + } } ira->codegen->caches_to_release.append(cache_hash); diff --git a/src/os.cpp b/src/os.cpp index d5195c92d8..8a636f8b62 100644 --- a/src/os.cpp +++ b/src/os.cpp @@ -71,8 +71,10 @@ typedef SSIZE_T ssize_t; #if defined(ZIG_OS_WINDOWS) static double win32_time_resolution; +static LARGE_INTEGER windows_perf_freq; #elif defined(__MACH__) -static clock_serv_t cclock; +static clock_serv_t macos_calendar_clock; +static clock_serv_t macos_monotonic_clock; #endif #include @@ -1233,28 +1235,60 @@ Error os_rename(Buf *src_path, Buf *dest_path) { return ErrorNone; } -double os_get_time(void) { #if defined(ZIG_OS_WINDOWS) - unsigned __int64 time; - QueryPerformanceCounter((LARGE_INTEGER*) &time); - return time * win32_time_resolution; +static void windows_filetime_to_os_timestamp(FILETIME *ft, OsTimeStamp *mtime) { + mtime->sec = (((ULONGLONG) ft->dwHighDateTime) << 32) + ft->dwLowDateTime; + mtime->nsec = 0; +} +#endif + +OsTimeStamp os_timestamp_calendar(void) { + OsTimeStamp result; +#if defined(ZIG_OS_WINDOWS) + FILETIME ft; + GetSystemTimeAsFileTime(&ft); + windows_filetime_to_os_timestamp(&ft, &result); #elif defined(__MACH__) mach_timespec_t mts; - kern_return_t err = clock_get_time(cclock, &mts); + kern_return_t err = clock_get_time(macos_calendar_clock, &mts); assert(!err); - double seconds = (double)mts.tv_sec; - seconds += ((double)mts.tv_nsec) / 1000000000.0; + result.sec = mts.tv_sec; + result.nsec = mts.tv_nsec; +#else + struct timespec tms; + clock_gettime(CLOCK_REALTIME, &tms); - return seconds; + result.sec = tms.tv_sec; + result.nsec = tms.tv_nsec; +#endif + return result; +} + +OsTimeStamp os_timestamp_monotonic(void) { + OsTimeStamp result; +#if defined(ZIG_OS_WINDOWS) + LARGE_INTEGER counts; + QueryPerformanceCounter(&counts); + result.sec = counts / windows_perf_freq; + result.nsec = (counts % windows_perf_freq) * 1000000000u / windows_perf_freq; +#elif defined(__MACH__) + mach_timespec_t mts; + + kern_return_t err = clock_get_time(macos_monotonic_clock, &mts); + assert(!err); + + result.sec = mts.tv_sec; + result.nsec = mts.tv_nsec; #else struct timespec tms; clock_gettime(CLOCK_MONOTONIC, &tms); - double seconds = (double)tms.tv_sec; - seconds += ((double)tms.tv_nsec) / 1000000000.0; - return seconds; + + result.sec = tms.tv_sec; + result.nsec = tms.tv_nsec; #endif + return result; } Error os_make_path(Buf *path) { @@ -1352,14 +1386,14 @@ int os_init(void) { #if defined(ZIG_OS_WINDOWS) _setmode(fileno(stdout), _O_BINARY); _setmode(fileno(stderr), _O_BINARY); - unsigned __int64 frequency; - if (QueryPerformanceFrequency((LARGE_INTEGER*) &frequency)) { - win32_time_resolution = 1.0 / (double) frequency; + if (QueryPerformanceFrequency(&windows_perf_freq)) { + win32_time_resolution = 1.0 / (double) windows_perf_freq; } else { return ErrorSystemResources; } #elif defined(__MACH__) - host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); + host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &macos_monotonic_clock); + host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &macos_calendar_clock); #endif return 0; } @@ -1780,7 +1814,7 @@ Error os_self_exe_shared_libs(ZigList &paths) { #endif } -Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) { +Error os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr) { #if defined(ZIG_OS_WINDOWS) // TODO use CreateFileW HANDLE result = CreateFileA(buf_ptr(full_path), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); @@ -1808,14 +1842,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) { } *out_file = result; - if (mtime != nullptr) { - FILETIME last_write_time; - if (!GetFileTime(result, nullptr, nullptr, &last_write_time)) { + if (attr != nullptr) { + BY_HANDLE_FILE_INFORMATION file_info; + if (!GetFileInformationByHandle(result, &file_info)) { CloseHandle(result); return ErrorUnexpected; } - mtime->sec = (((ULONGLONG) last_write_time.dwHighDateTime) << 32) + last_write_time.dwLowDateTime; - mtime->nsec = 0; + windows_filetime_to_os_timestamp(&file_info.ftLastWriteTime, &attr->mtime); + attr->inode = (((uint64_t)file_info.nFileIndexHigh) << 32) | file_info.nFileIndexLow; } return ErrorNone; @@ -1851,13 +1885,14 @@ Error os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime) { } *out_file = fd; - if (mtime != nullptr) { + if (attr != nullptr) { + attr->inode = statbuf.st_ino; #if defined(ZIG_OS_DARWIN) - mtime->sec = statbuf.st_mtimespec.tv_sec; - mtime->nsec = statbuf.st_mtimespec.tv_nsec; + attr->mtime.sec = statbuf.st_mtimespec.tv_sec; + attr->mtime.nsec = statbuf.st_mtimespec.tv_nsec; #else - mtime->sec = statbuf.st_mtim.tv_sec; - mtime->nsec = statbuf.st_mtim.tv_nsec; + attr->mtime.sec = statbuf.st_mtim.tv_sec; + attr->mtime.nsec = statbuf.st_mtim.tv_nsec; #endif } return ErrorNone; diff --git a/src/os.hpp b/src/os.hpp index 916158b4f3..36d3c327a8 100644 --- a/src/os.hpp +++ b/src/os.hpp @@ -85,6 +85,11 @@ struct OsTimeStamp { uint64_t nsec; }; +struct OsFileAttr { + OsTimeStamp mtime; + uint64_t inode; +}; + int os_init(void); void os_spawn_process(const char *exe, ZigList &args, Termination *term); @@ -103,7 +108,7 @@ bool os_path_is_absolute(Buf *path); Error ATTRIBUTE_MUST_USE os_make_path(Buf *path); Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path); -Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsTimeStamp *mtime); +Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file, OsFileAttr *attr); Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file); Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len); Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents); @@ -126,7 +131,8 @@ Error os_delete_file(Buf *path); Error ATTRIBUTE_MUST_USE os_file_exists(Buf *full_path, bool *result); Error os_rename(Buf *src_path, Buf *dest_path); -double os_get_time(void); +OsTimeStamp os_timestamp_monotonic(void); +OsTimeStamp os_timestamp_calendar(void); bool os_is_sep(uint8_t c); diff --git a/src/util.hpp b/src/util.hpp index abf4d37c88..64c85033e3 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -63,10 +63,27 @@ static inline int clzll(unsigned long long mask) { return 63 - lz; #endif } +static inline int ctzll(unsigned long long mask) { + unsigned long result; +#if defined(_WIN64) + if (_BitScanForward64(&result, mask)) + return result; + zig_unreachable(); +#else + if (_BitScanForward(&result, mask & 0xffffffff)) + return result; + } + if (_BitScanForward(&result, mask >> 32)) + return 32 + result; + zig_unreachable(); +#endif +} #else #define clzll(x) __builtin_clzll(x) +#define ctzll(x) __builtin_ctzll(x) #endif + template ATTRIBUTE_RETURNS_NOALIAS static inline T *allocate_nonzero(size_t count) { #ifndef NDEBUG