Prevent analysis of functions only referenced at comptime

The idea here is that there are two ways we can reference a function at runtime:

* Through a direct call, i.e. where the function is comptime-known
* Through a function pointer

This means we can easily perform a form of rudimentary escape analysis
on functions. If we ever see a `decl_ref` or `ref` of a function, we
have a function pointer, which could "leak" into runtime code, so we
emit the function; but for a plain `decl_val`, there's no need to.

This change means that `comptime { _ = f; }` no longer forces a function
to be emitted, which was used for some things (mainly tests). These use
sites have been replaced with `_ = &f;`, which still triggers analysis
of the function body, since you're taking a pointer to the function.

Resolves: #6256
Resolves: #15353
This commit is contained in:
mlugg 2023-05-29 05:07:17 +01:00
parent b5fad3a40a
commit 4976b58ab1
No known key found for this signature in database
GPG Key ID: 58978E823BDE3EF9
33 changed files with 149 additions and 65 deletions

View File

@ -12,7 +12,7 @@ pub const panic = @import("common.zig").panic;
// specified range.
comptime {
_ = clear_cache;
_ = &clear_cache;
}
fn clear_cache(start: usize, end: usize) callconv(.C) void {

View File

@ -1959,7 +1959,7 @@ pub const parseFloat = @import("fmt/parse_float.zig").parseFloat;
pub const ParseFloatError = @import("fmt/parse_float.zig").ParseFloatError;
test {
_ = parseFloat;
_ = &parseFloat;
}
pub fn charToDigit(c: u8, radix: u8) (error{InvalidCharacter}!u8) {

View File

@ -3150,12 +3150,12 @@ fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t, maybe_size: ?u64) CopyFileRawError
test {
if (builtin.os.tag != .wasi) {
_ = makeDirAbsolute;
_ = makeDirAbsoluteZ;
_ = copyFileAbsolute;
_ = updateFileAbsolute;
_ = &makeDirAbsolute;
_ = &makeDirAbsoluteZ;
_ = &copyFileAbsolute;
_ = &updateFileAbsolute;
}
_ = Dir.copyFile;
_ = &Dir.copyFile;
_ = @import("fs/test.zig");
_ = @import("fs/path.zig");
_ = @import("fs/file.zig");

View File

@ -1605,7 +1605,7 @@ pub fn HashMapUnmanaged(
comptime {
if (builtin.mode == .Debug) {
_ = dbHelper;
_ = &dbHelper;
}
}
};

View File

@ -532,8 +532,8 @@ pub fn MultiArrayList(comptime T: type) type {
comptime {
if (builtin.mode == .Debug) {
_ = dbHelper;
_ = Slice.dbHelper;
_ = &dbHelper;
_ = &Slice.dbHelper;
}
}
};

View File

@ -704,7 +704,7 @@ test "signalfd" {
.linux, .solaris => {},
else => return error.SkipZigTest,
}
_ = os.signalfd;
_ = &os.signalfd;
}
test "sync" {

View File

@ -1116,7 +1116,7 @@ pub fn checkAllAllocationFailures(backing_allocator: std.mem.Allocator, comptime
pub fn refAllDecls(comptime T: type) void {
if (!builtin.is_test) return;
inline for (comptime std.meta.declarations(T)) |decl| {
if (decl.is_pub) _ = @field(T, decl.name);
if (decl.is_pub) _ = &@field(T, decl.name);
}
}
@ -1132,7 +1132,7 @@ pub fn refAllDeclsRecursive(comptime T: type) void {
else => {},
}
}
_ = @field(T, decl.name);
_ = &@field(T, decl.name);
}
}
}

View File

@ -3193,6 +3193,13 @@ fn processOneJob(comp: *Compilation, job: Job, prog_node: *std.Progress.Node) !v
error.OutOfMemory => return error.OutOfMemory,
error.AnalysisFail => return,
};
const decl = module.declPtr(decl_index);
if (decl.kind == .@"test" and comp.bin_file.options.is_test) {
// Tests are always emitted in test binaries. The decl_refs are created by
// Module.populateTestFunctions, but this will not queue body analysis, so do
// that now.
try module.ensureFuncBodyAnalysisQueued(decl.val.castTag(.function).?.data);
}
},
.update_embed_file => |embed_file| {
const named_frame = tracy.namedFrame("update_embed_file");

View File

@ -1638,6 +1638,10 @@ pub const Fn = struct {
inferred_error_sets: InferredErrorSetList = .{},
pub const Analysis = enum {
/// This function has not yet undergone analysis, because we have not
/// seen a potential runtime call. It may be analyzed in future.
none,
/// Analysis for this function has been queued, but not yet completed.
queued,
/// This function intentionally only has ZIR generated because it is marked
/// inline, which means no runtime version of the function will be generated.
@ -4323,7 +4327,7 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
.complete, .codegen_failure_retryable => {
switch (func.state) {
.sema_failure, .dependency_failure => return error.AnalysisFail,
.queued => {},
.none, .queued => {},
.in_progress => unreachable,
.inline_only => unreachable, // don't queue work for this
.success => return,
@ -4426,6 +4430,60 @@ pub fn ensureFuncBodyAnalyzed(mod: *Module, func: *Fn) SemaError!void {
}
}
/// Ensure this function's body is or will be analyzed and emitted. This should
/// be called whenever a potential runtime call of a function is seen.
///
/// The caller is responsible for ensuring the function decl itself is already
/// analyzed, and for ensuring it can exist at runtime (see
/// `sema.fnHasRuntimeBits`). This function does *not* guarantee that the body
/// will be analyzed when it returns: for that, see `ensureFuncBodyAnalyzed`.
pub fn ensureFuncBodyAnalysisQueued(mod: *Module, func: *Fn) !void {
const decl_index = func.owner_decl;
const decl = mod.declPtr(decl_index);
switch (decl.analysis) {
.unreferenced => unreachable,
.in_progress => unreachable,
.outdated => unreachable,
.file_failure,
.sema_failure,
.liveness_failure,
.codegen_failure,
.dependency_failure,
.sema_failure_retryable,
.codegen_failure_retryable,
// The function analysis failed, but we've already emitted an error for
// that. The callee doesn't need the function to be analyzed right now,
// so its analysis can safely continue.
=> return,
.complete => {},
}
assert(decl.has_tv);
switch (func.state) {
.none => {},
.queued => return,
// As above, we don't need to forward errors here.
.sema_failure, .dependency_failure => return,
.in_progress => return,
.inline_only => unreachable, // don't queue work for this
.success => return,
}
// Decl itself is safely analyzed, and body analysis is not yet queued
try mod.comp.work_queue.writeItem(.{ .codegen_func = func });
if (mod.emit_h != null) {
// TODO: we ideally only want to do this if the function's type changed
// since the last update
try mod.comp.work_queue.writeItem(.{ .emit_h_decl = decl_index });
}
func.state = .queued;
}
pub fn updateEmbedFile(mod: *Module, embed_file: *EmbedFile) SemaError!void {
const tracy = trace(@src());
defer tracy.end();
@ -4733,20 +4791,6 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool {
decl.analysis = .complete;
decl.generation = mod.generation;
const has_runtime_bits = try sema.fnHasRuntimeBits(decl.ty);
if (has_runtime_bits) {
// We don't fully codegen the decl until later, but we do need to reserve a global
// offset table index for it. This allows us to codegen decls out of dependency
// order, increasing how many computations can be done in parallel.
try mod.comp.work_queue.writeItem(.{ .codegen_func = func });
if (type_changed and mod.emit_h != null) {
try mod.comp.work_queue.writeItem(.{ .emit_h_decl = decl_index });
}
} else if (!prev_is_inline and prev_type_has_bits) {
mod.comp.bin_file.freeDecl(decl_index);
}
const is_inline = decl.ty.fnCallingConvention() == .Inline;
if (decl.is_exported) {
const export_src: LazySrcLoc = .{ .token_offset = @boolToInt(decl.is_pub) };

View File

@ -2452,6 +2452,7 @@ fn zirCoerceResultPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileE
.@"align" = iac.data.alignment,
.@"addrspace" = addr_space,
});
try sema.maybeQueueFuncBodyAnalysis(iac.data.decl_index);
return sema.addConstant(
ptr_ty,
try Value.Tag.decl_ref_mut.create(sema.arena, .{
@ -3709,6 +3710,7 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
const final_ptr_ty_inst = try sema.addType(final_ptr_ty);
sema.air_instructions.items(.data)[ptr_inst].ty_pl.ty = final_ptr_ty_inst;
try sema.maybeQueueFuncBodyAnalysis(decl_index);
if (var_is_mut) {
sema.air_values.items[value_index] = try Value.Tag.decl_ref_mut.create(sema.arena, .{
.decl_index = decl_index,
@ -3809,6 +3811,7 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
// Even though we reuse the constant instruction, we still remove it from the
// block so that codegen does not see it.
block.instructions.shrinkRetainingCapacity(search_index);
try sema.maybeQueueFuncBodyAnalysis(new_decl_index);
sema.air_values.items[value_index] = try Value.Tag.decl_ref.create(sema.arena, new_decl_index);
// if bitcast ty ref needs to be made const, make_ptr_const
// ZIR handles it later, so we can just use the ty ref here.
@ -5747,6 +5750,7 @@ pub fn analyzeExport(
// This decl is alive no matter what, since it's being exported
mod.markDeclAlive(exported_decl);
try sema.maybeQueueFuncBodyAnalysis(exported_decl_index);
const gpa = mod.gpa;
@ -7068,6 +7072,12 @@ fn analyzeCall(
sema.owner_func.?.calls_or_awaits_errorable_fn = true;
}
if (try sema.resolveMaybeUndefVal(func)) |func_val| {
if (func_val.castTag(.function)) |func_obj| {
try sema.mod.ensureFuncBodyAnalysisQueued(func_obj.data);
}
}
try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Call).Struct.fields.len +
args.len);
const func_inst = try block.addInst(.{
@ -7585,6 +7595,8 @@ fn instantiateGenericCall(
sema.owner_func.?.calls_or_awaits_errorable_fn = true;
}
try sema.mod.ensureFuncBodyAnalysisQueued(callee);
try sema.air_extra.ensureUnusedCapacity(sema.gpa, @typeInfo(Air.Call).Struct.fields.len +
runtime_args_len);
const result = try block.addInst(.{
@ -9143,7 +9155,7 @@ fn funcCommon(
}
const is_inline = fn_ty.fnCallingConvention() == .Inline;
const anal_state: Module.Fn.Analysis = if (is_inline) .inline_only else .queued;
const anal_state: Module.Fn.Analysis = if (is_inline) .inline_only else .none;
const comptime_args: ?[*]TypedValue = if (sema.comptime_args_fn_inst == func_inst) blk: {
break :blk if (sema.comptime_args.len == 0) null else sema.comptime_args.ptr;
@ -24279,9 +24291,7 @@ fn fieldCallBind(
if (concrete_ty.getNamespace()) |namespace| {
if (try sema.namespaceLookup(block, src, namespace, field_name)) |decl_idx| {
try sema.addReferencedBy(block, src, decl_idx);
const inst = try sema.analyzeDeclRef(decl_idx);
const decl_val = try sema.analyzeLoad(block, src, inst, src);
const decl_val = try sema.analyzeDeclVal(block, src, decl_idx);
const decl_type = sema.typeOf(decl_val);
if (decl_type.zigTypeTag() == .Fn and
decl_type.fnParamLen() >= 1)
@ -28911,7 +28921,7 @@ fn analyzeDeclVal(
if (sema.decl_val_table.get(decl_index)) |result| {
return result;
}
const decl_ref = try sema.analyzeDeclRef(decl_index);
const decl_ref = try sema.analyzeDeclRefInner(decl_index, false);
const result = try sema.analyzeLoad(block, src, decl_ref, src);
if (Air.refToIndex(result)) |index| {
if (sema.air_instructions.items(.tag)[index] == .constant and !block.is_typeof) {
@ -28970,6 +28980,7 @@ fn refValue(sema: *Sema, block: *Block, ty: Type, val: Value) !Value {
try val.copy(anon_decl.arena()),
0, // default alignment
);
try sema.maybeQueueFuncBodyAnalysis(decl);
try sema.mod.declareDeclDependency(sema.owner_decl_index, decl);
return try Value.Tag.decl_ref.create(sema.arena, decl);
}
@ -28982,6 +28993,14 @@ fn optRefValue(sema: *Sema, block: *Block, ty: Type, opt_val: ?Value) !Value {
}
fn analyzeDeclRef(sema: *Sema, decl_index: Decl.Index) CompileError!Air.Inst.Ref {
return sema.analyzeDeclRefInner(decl_index, true);
}
/// Analyze a reference to the decl at the given index. Ensures the underlying decl is analyzed, but
/// only triggers analysis for function bodies if `analyze_fn_body` is true. If it's possible for a
/// decl_ref to end up in runtime code, the function body must be analyzed: `analyzeDeclRef` wraps
/// this function with `analyze_fn_body` set to true.
fn analyzeDeclRefInner(sema: *Sema, decl_index: Decl.Index, analyze_fn_body: bool) CompileError!Air.Inst.Ref {
try sema.mod.declareDeclDependency(sema.owner_decl_index, decl_index);
try sema.ensureDeclAnalyzed(decl_index);
@ -28997,6 +29016,9 @@ fn analyzeDeclRef(sema: *Sema, decl_index: Decl.Index) CompileError!Air.Inst.Ref
});
return sema.addConstant(ty, try Value.Tag.decl_ref.create(sema.arena, decl_index));
}
if (analyze_fn_body) {
try sema.maybeQueueFuncBodyAnalysis(decl_index);
}
return sema.addConstant(
try Type.ptr(sema.arena, sema.mod, .{
.pointee_type = decl_tv.ty,
@ -29008,6 +29030,15 @@ fn analyzeDeclRef(sema: *Sema, decl_index: Decl.Index) CompileError!Air.Inst.Ref
);
}
fn maybeQueueFuncBodyAnalysis(sema: *Sema, decl_index: Decl.Index) !void {
const decl = sema.mod.declPtr(decl_index);
const tv = try decl.typedValue();
if (tv.ty.zigTypeTag() != .Fn) return;
if (!try sema.fnHasRuntimeBits(tv.ty)) return;
const func = tv.val.castTag(.function) orelse return; // undef or extern_fn
try sema.mod.ensureFuncBodyAnalysisQueued(func.data);
}
fn analyzeRef(
sema: *Sema,
block: *Block,

View File

@ -6802,7 +6802,7 @@ pub const Type = extern union {
comptime {
if (builtin.mode == .Debug) {
_ = dbHelper;
_ = &dbHelper;
}
}
};

View File

@ -5709,7 +5709,7 @@ pub const Value = extern union {
comptime {
if (builtin.mode == .Debug) {
_ = dbHelper;
_ = &dbHelper;
}
}
};

View File

@ -48,7 +48,7 @@ fn fn1(alpha: bool) void {
}
test "lazy @sizeOf result is checked for definedness" {
_ = fn1;
_ = &fn1;
}
const A = struct {

View File

@ -3,7 +3,7 @@ pub inline fn instanceRequestAdapter() void {}
pub inline fn requestAdapter(
comptime callbackArg: fn () callconv(.Inline) void,
) void {
_ = (struct {
_ = &(struct {
pub fn callback() callconv(.C) void {
callbackArg();
}

View File

@ -1,5 +1,5 @@
const Bar = union(enum(u32)) {
X: i32 = 1
X: i32 = 1,
};
fn testCompileLog(x: Bar) void {
@ -7,7 +7,8 @@ fn testCompileLog(x: Bar) void {
}
pub export fn entry() void {
comptime testCompileLog(Bar{.X = 123});
comptime testCompileLog(Bar{ .X = 123 });
_ = &testCompileLog;
}
// error

View File

@ -1,10 +1,11 @@
export fn foo() void {
comptime bar(12, "hi",);
comptime bar(12, "hi");
_ = &bar;
}
fn bar(a: i32, b: []const u8) void {
@compileLog("begin",);
@compileLog("begin");
@compileLog("a", a, "b", b);
@compileLog("end",);
@compileLog("end");
}
export fn baz() void {
const S = struct { a: u32 };
@ -15,8 +16,8 @@ export fn baz() void {
// backend=llvm
// target=native
//
// :5:5: error: found compile log statement
// :11:5: note: also here
// :6:5: error: found compile log statement
// :12:5: note: also here
//
// Compile Log Output:
// @as(*const [5:0]u8, "begin")

View File

@ -2,7 +2,7 @@ fn entry(x: []i32) i32 {
return x.*;
}
comptime {
_ = entry;
_ = &entry;
}
// error

View File

@ -4,9 +4,9 @@ fn f() i32 {
}
pub extern fn entry1(b: u32, comptime a: [2]u8, c: i32) void;
pub extern fn entry2(b: u32, noalias a: anytype, i43) void;
comptime { _ = f; }
comptime { _ = entry1; }
comptime { _ = entry2; }
comptime { _ = &f; }
comptime { _ = &entry1; }
comptime { _ = &entry2; }
// error
// backend=stage2

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) i32) *i32 {
return a;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// error

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) i32) *i32 {
return &a.*;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// error

View File

@ -2,10 +2,10 @@ fn f(noalias x: i32) void { _ = x; }
export fn entry() void { f(1234); }
fn generic(comptime T: type, noalias _: [*]T, noalias _: [*]const T, _: usize) void {}
comptime { _ = generic; }
comptime { _ = &generic; }
fn slice(noalias _: []u8) void {}
comptime { _ = slice; }
comptime { _ = &slice; }
// error
// backend=stage2

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) i32) *addrspace(.fs) i32 {
return a;
}
export fn entry2() void {
_ = entry;
_ = &entry;
}
// error

View File

@ -2,7 +2,7 @@ fn entry(a: ?*addrspace(.gs) i32) *i32 {
return a.?;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// error

View File

@ -2,7 +2,7 @@ fn foo() [:0]u8 {
var x: []u8 = undefined;
return x;
}
comptime { _ = foo; }
comptime { _ = &foo; }
// error
// backend=stage2

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) ?[1]i32) *addrspace(.gs) i32 {
return &a.*.?[0];
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) [1]i32) *addrspace(.gs) i32 {
return &a[0];
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -3,7 +3,7 @@ fn entry(a: *addrspace(.gs) [1]A) *addrspace(.gs) i32 {
return &a[0].a.?[0];
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -3,7 +3,7 @@ fn entry(a: *addrspace(.gs) A) *addrspace(.gs) i32 {
return &a.a;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.fs) *addrspace(.gs) *i32) *i32 {
return a.*.*;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) i32) *addrspace(.gs) i32 {
return a;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.gs) i32) *addrspace(.gs) i32 {
return &a.*;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -2,7 +2,7 @@ fn entry(a: *addrspace(.generic) i32) *i32 {
return a;
}
pub fn main() void {
_ = entry;
_ = &entry;
}
// compile

View File

@ -26,10 +26,10 @@ fn add(b: *std.Build, test_step: *std.Build.Step, optimize: std.builtin.Optimize
const check_lib = lib.checkObject();
check_lib.checkStart("Section type");
// only 3 entries, although we have more functions.
// only 2 entries, although we have more functions.
// This is to test functions with the same function signature
// have their types deduplicated.
check_lib.checkNext("entries 3");
check_lib.checkNext("entries 2");
check_lib.checkNext("params 1");
check_lib.checkNext("type i32");
check_lib.checkNext("returns 1");