stage2: implement @embedFile

This commit is contained in:
Andrew Kelley 2021-10-17 18:57:54 -07:00
parent ad17108bdd
commit e5dac0a0b3
3 changed files with 275 additions and 8 deletions

View File

@ -55,6 +55,10 @@ c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
/// since the last compilation, as well as scan for `@import` and queue up
/// additional jobs corresponding to those new files.
astgen_work_queue: std.fifo.LinearFifo(*Module.File, .Dynamic),
/// These jobs are to inspect the file system stat() and if the embedded file has changed
/// on disk, mark the corresponding Decl outdated and queue up an `analyze_decl`
/// task for it.
embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic),
/// The ErrorMsg memory is owned by the `CObject`, using Compilation's general purpose allocator.
/// This data is accessed by multiple threads and is protected by `mutex`.
@ -181,6 +185,10 @@ const Job = union(enum) {
/// It may have already be analyzed, or it may have been determined
/// to be outdated; in this case perform semantic analysis again.
analyze_decl: *Module.Decl,
/// The file that was loaded with `@embedFile` has changed on disk
/// and has been re-loaded into memory. All Decls that depend on it
/// need to be re-analyzed.
update_embed_file: *Module.EmbedFile,
/// The source file containing the Decl has been updated, and so the
/// Decl may need its line number information updated in the debug info.
update_line_number: *Module.Decl,
@ -1447,6 +1455,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
.work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
.c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
.astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa),
.embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa),
.keep_source_files_loaded = options.keep_source_files_loaded,
.use_clang = use_clang,
.clang_argv = options.clang_argv,
@ -1632,6 +1641,7 @@ pub fn destroy(self: *Compilation) void {
self.work_queue.deinit();
self.c_object_work_queue.deinit();
self.astgen_work_queue.deinit();
self.embed_file_work_queue.deinit();
{
var it = self.crt_files.iterator();
@ -1747,6 +1757,16 @@ pub fn update(self: *Compilation) !void {
}
if (!use_stage1) {
// Put a work item in for checking if any files used with `@embedFile` changed.
{
try self.embed_file_work_queue.ensureUnusedCapacity(module.embed_table.count());
var it = module.embed_table.iterator();
while (it.next()) |entry| {
const embed_file = entry.value_ptr.*;
self.embed_file_work_queue.writeItemAssumeCapacity(embed_file);
}
}
try self.work_queue.writeItem(.{ .analyze_pkg = std_pkg });
if (self.bin_file.options.is_test) {
try self.work_queue.writeItem(.{ .analyze_pkg = module.main_pkg });
@ -1870,6 +1890,7 @@ pub fn totalErrorCount(self: *Compilation) usize {
if (self.bin_file.options.module) |module| {
total += module.failed_exports.count();
total += module.failed_embed_files.count();
{
var it = module.failed_files.iterator();
@ -1966,6 +1987,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !AllErrors {
}
}
}
{
var it = module.failed_embed_files.iterator();
while (it.next()) |entry| {
const msg = entry.value_ptr.*;
try AllErrors.add(module, &arena, &errors, msg.*);
}
}
{
var it = module.failed_decls.iterator();
while (it.next()) |entry| {
@ -2065,6 +2093,9 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
var c_obj_prog_node = main_progress_node.start("Compile C Objects", self.c_source_files.len);
defer c_obj_prog_node.end();
var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", self.embed_file_work_queue.count);
defer embed_file_prog_node.end();
self.work_queue_wait_group.reset();
defer self.work_queue_wait_group.wait();
@ -2079,6 +2110,13 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
});
}
while (self.embed_file_work_queue.readItem()) |embed_file| {
self.astgen_wait_group.start();
try self.thread_pool.spawn(workerCheckEmbedFile, .{
self, embed_file, &embed_file_prog_node, &self.astgen_wait_group,
});
}
while (self.c_object_work_queue.readItem()) |c_object| {
self.work_queue_wait_group.start();
try self.thread_pool.spawn(workerUpdateCObject, .{
@ -2260,6 +2298,15 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
error.AnalysisFail => continue,
};
},
.update_embed_file => |embed_file| {
if (build_options.omit_stage2)
@panic("sadly stage2 is omitted from this build to save memory on the CI server");
const module = self.bin_file.options.module.?;
module.updateEmbedFile(embed_file) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.AnalysisFail => continue,
};
},
.update_line_number => |decl| {
if (build_options.omit_stage2)
@panic("sadly stage2 is omitted from this build to save memory on the CI server");
@ -2542,6 +2589,29 @@ fn workerAstGenFile(
}
}
fn workerCheckEmbedFile(
comp: *Compilation,
embed_file: *Module.EmbedFile,
prog_node: *std.Progress.Node,
wg: *WaitGroup,
) void {
defer wg.finish();
var child_prog_node = prog_node.start(embed_file.sub_file_path, 0);
child_prog_node.activate();
defer child_prog_node.end();
const mod = comp.bin_file.options.module.?;
mod.detectEmbedFileUpdate(embed_file) catch |err| {
comp.reportRetryableEmbedFileError(embed_file, err) catch |oom| switch (oom) {
// Swallowing this error is OK because it's implied to be OOM when
// there is a missing `failed_embed_files` error message.
error.OutOfMemory => {},
};
return;
};
}
pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest {
var man = comp.cache_parent.obtain();
@ -2790,6 +2860,36 @@ fn reportRetryableAstGenError(
}
}
fn reportRetryableEmbedFileError(
comp: *Compilation,
embed_file: *Module.EmbedFile,
err: anyerror,
) error{OutOfMemory}!void {
const mod = comp.bin_file.options.module.?;
const gpa = mod.gpa;
const src_loc: Module.SrcLoc = embed_file.owner_decl.srcLoc();
const err_msg = if (embed_file.pkg.root_src_directory.path) |dir_path|
try Module.ErrorMsg.create(
gpa,
src_loc,
"unable to load '{s}" ++ std.fs.path.sep_str ++ "{s}': {s}",
.{ dir_path, embed_file.sub_file_path, @errorName(err) },
)
else
try Module.ErrorMsg.create(gpa, src_loc, "unable to load '{s}': {s}", .{
embed_file.sub_file_path, @errorName(err),
});
errdefer err_msg.destroy(gpa);
{
const lock = comp.mutex.acquire();
defer lock.release();
try mod.failed_embed_files.putNoClobber(gpa, embed_file, err_msg);
}
}
fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.Progress.Node) !void {
if (!build_options.have_llvm) {
return comp.failCObj(c_object, "clang not available: compiler built without LLVM extensions", .{});

View File

@ -55,11 +55,17 @@ decl_exports: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{},
/// is performing the export of another Decl.
/// This table owns the Export memory.
export_owners: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{},
/// The set of all the files in the Module. We keep track of this in order to iterate
/// over it and check which source files have been modified on the file system when
/// The set of all the Zig source files in the Module. We keep track of this in order
/// to iterate over it and check which source files have been modified on the file system when
/// an update is requested, as well as to cache `@import` results.
/// Keys are fully resolved file paths. This table owns the keys and values.
import_table: std.StringArrayHashMapUnmanaged(*File) = .{},
/// The set of all the files which have been loaded with `@embedFile` in the Module.
/// We keep track of this in order to iterate over it and check which files have been
/// modified on the file system when an update is requested, as well as to cache
/// `@embedFile` results.
/// Keys are fully resolved file paths. This table owns the keys and values.
embed_table: std.StringHashMapUnmanaged(*EmbedFile) = .{},
/// The set of all the generic function instantiations. This is used so that when a generic
/// function is called twice with the same comptime parameter arguments, both calls dispatch
@ -87,6 +93,8 @@ compile_log_decls: std.AutoArrayHashMapUnmanaged(*Decl, i32) = .{},
/// Using a map here for consistency with the other fields here.
/// The ErrorMsg memory is owned by the `File`, using Module's general purpose allocator.
failed_files: std.AutoArrayHashMapUnmanaged(*File, ?*ErrorMsg) = .{},
/// The ErrorMsg memory is owned by the `EmbedFile`, using Module's general purpose allocator.
failed_embed_files: std.AutoArrayHashMapUnmanaged(*EmbedFile, *ErrorMsg) = .{},
/// Using a map here for consistency with the other fields here.
/// The ErrorMsg memory is owned by the `Export`, using Module's general purpose allocator.
failed_exports: std.AutoArrayHashMapUnmanaged(*Export, *ErrorMsg) = .{},
@ -1534,6 +1542,23 @@ pub const File = struct {
}
};
/// Represents the contents of a file loaded with `@embedFile`.
pub const EmbedFile = struct {
/// Relative to the owning package's root_src_dir.
/// Memory is stored in gpa, owned by EmbedFile.
sub_file_path: []const u8,
bytes: [:0]const u8,
stat_size: u64,
stat_inode: std.fs.File.INode,
stat_mtime: i128,
/// Package that this file is a part of, managed externally.
pkg: *Package,
/// The Decl that was created from the `@embedFile` to own this resource.
/// This is how zig knows what other Decl objects to invalidate if the file
/// changes on disk.
owner_decl: *Decl,
};
/// This struct holds data necessary to construct API-facing `AllErrors.Message`.
/// Its memory is managed with the general purpose allocator so that they
/// can be created and destroyed in response to incremental updates.
@ -2364,6 +2389,11 @@ pub fn deinit(mod: *Module) void {
}
mod.failed_files.deinit(gpa);
for (mod.failed_embed_files.values()) |msg| {
msg.destroy(gpa);
}
mod.failed_embed_files.deinit(gpa);
for (mod.failed_exports.values()) |value| {
value.destroy(gpa);
}
@ -3060,6 +3090,32 @@ pub fn ensureDeclAnalyzed(mod: *Module, decl: *Decl) SemaError!void {
}
}
pub fn updateEmbedFile(mod: *Module, embed_file: *EmbedFile) SemaError!void {
const tracy = trace(@src());
defer tracy.end();
// TODO we can potentially relax this if we store some more information along
// with decl dependency edges
for (embed_file.owner_decl.dependants.keys()) |dep| {
switch (dep.analysis) {
.unreferenced => unreachable,
.in_progress => continue, // already doing analysis, ok
.outdated => continue, // already queued for update
.file_failure,
.dependency_failure,
.sema_failure,
.sema_failure_retryable,
.codegen_failure,
.codegen_failure_retryable,
.complete,
=> if (dep.generation != mod.generation) {
try mod.markOutdatedDecl(dep);
},
}
}
}
pub fn semaPkg(mod: *Module, pkg: *Package) !void {
const file = (try mod.importPkg(pkg)).file;
return mod.semaFile(file);
@ -3551,6 +3607,84 @@ pub fn importFile(
};
}
pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*EmbedFile {
const gpa = mod.gpa;
// The resolved path is used as the key in the table, to detect if
// a file refers to the same as another, despite different relative paths.
const cur_pkg_dir_path = cur_file.pkg.root_src_directory.path orelse ".";
const resolved_path = try std.fs.path.resolve(gpa, &[_][]const u8{
cur_pkg_dir_path, cur_file.sub_file_path, "..", rel_file_path,
});
var keep_resolved_path = false;
defer if (!keep_resolved_path) gpa.free(resolved_path);
const gop = try mod.embed_table.getOrPut(gpa, resolved_path);
if (gop.found_existing) return gop.value_ptr.*;
keep_resolved_path = true; // It's now owned by embed_table.
const new_file = try gpa.create(EmbedFile);
errdefer gpa.destroy(new_file);
const resolved_root_path = try std.fs.path.resolve(gpa, &[_][]const u8{cur_pkg_dir_path});
defer gpa.free(resolved_root_path);
if (!mem.startsWith(u8, resolved_path, resolved_root_path)) {
return error.ImportOutsidePkgPath;
}
// +1 for the directory separator here.
const sub_file_path = try gpa.dupe(u8, resolved_path[resolved_root_path.len + 1 ..]);
errdefer gpa.free(sub_file_path);
var file = try cur_file.pkg.root_src_directory.handle.openFile(sub_file_path, .{});
defer file.close();
const stat = try file.stat();
const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0);
log.debug("new embedFile. resolved_root_path={s}, resolved_path={s}, sub_file_path={s}, rel_file_path={s}", .{
resolved_root_path, resolved_path, sub_file_path, rel_file_path,
});
gop.value_ptr.* = new_file;
new_file.* = .{
.sub_file_path = sub_file_path,
.bytes = bytes,
.stat_size = stat.size,
.stat_inode = stat.inode,
.stat_mtime = stat.mtime,
.pkg = cur_file.pkg,
.owner_decl = undefined, // Set by Sema immediately after this function returns.
};
return new_file;
}
pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void {
var file = try embed_file.pkg.root_src_directory.handle.openFile(embed_file.sub_file_path, .{});
defer file.close();
const stat = try file.stat();
const unchanged_metadata =
stat.size == embed_file.stat_size and
stat.mtime == embed_file.stat_mtime and
stat.inode == embed_file.stat_inode;
if (unchanged_metadata) return;
const gpa = mod.gpa;
const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0);
gpa.free(embed_file.bytes);
embed_file.bytes = bytes;
embed_file.stat_size = stat.size;
embed_file.stat_mtime = stat.mtime;
embed_file.stat_inode = stat.inode;
const lock = mod.comp.mutex.acquire();
defer lock.release();
try mod.comp.work_queue.writeItem(.{ .update_embed_file = embed_file });
}
pub fn scanNamespace(
mod: *Module,
namespace: *Namespace,

View File

@ -6467,6 +6467,45 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
return sema.addConstant(file_root_decl.ty, file_root_decl.val);
}
fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
const tracy = trace(@src());
defer tracy.end();
const mod = sema.mod;
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();
const name = try sema.resolveConstString(block, src, inst_data.operand);
const embed_file = mod.embedFile(block.getFileScope(), name) catch |err| switch (err) {
error.ImportOutsidePkgPath => {
return sema.fail(block, src, "embed of file outside package path: '{s}'", .{name});
},
else => {
// TODO: these errors are file system errors; make sure an update() will
// retry this and not cache the file system error, which may be transient.
return sema.fail(block, src, "unable to open '{s}': {s}", .{ name, @errorName(err) });
},
};
var anon_decl = try block.startAnonDecl();
defer anon_decl.deinit();
const bytes_including_null = embed_file.bytes[0 .. embed_file.bytes.len + 1];
// TODO instead of using `Value.Tag.bytes`, create a new value tag for pointing at
// a `*Module.EmbedFile`. The purpose of this would be:
// - If only the length is read and the bytes are not inspected by comptime code,
// there can be an optimization where the codegen backend does a copy_file_range
// into the final binary, and never loads the data into memory.
// - When a Decl is destroyed, it can free the `*Module.EmbedFile`.
embed_file.owner_decl = try anon_decl.finish(
try Type.Tag.array_u8_sentinel_0.create(anon_decl.arena(), embed_file.bytes.len),
try Value.Tag.bytes.create(anon_decl.arena(), bytes_including_null),
);
return sema.analyzeDeclRef(embed_file.owner_decl);
}
fn zirRetErrValueCode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
_ = block;
_ = inst;
@ -9020,12 +9059,6 @@ fn zirBoolToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
return block.addUnOp(.bool_to_int, operand);
}
fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();
return sema.fail(block, src, "TODO: Sema.zirEmbedFile", .{});
}
fn zirErrorName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();