From df52073681bdb332bef6b8a17576d80ab277c947 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 19 Aug 2024 16:29:45 -0700 Subject: [PATCH 1/9] llvm.Builder: add !nosanitize API see #20992 Co-authored-by: Jacob Young --- src/codegen/llvm/Builder.zig | 42 +++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig index 50b43319da..910ae21d3c 100644 --- a/src/codegen/llvm/Builder.zig +++ b/src/codegen/llvm/Builder.zig @@ -3994,6 +3994,7 @@ pub const Function = struct { names: [*]const String = &[0]String{}, value_indices: [*]const u32 = &[0]u32{}, strip: bool, + any_nosanitize: bool, debug_locations: std.AutoHashMapUnmanaged(Instruction.Index, DebugLocation) = .{}, debug_values: []const Instruction.Index = &.{}, extra: []const u32 = &.{}, @@ -4090,6 +4091,7 @@ pub const Function = struct { block, br, br_cond, + br_cond_nosanitize, call, @"call fast", cmpxchg, @@ -4345,6 +4347,13 @@ pub const Function = struct { else => unreachable, }; } + + pub fn isNosanitize(self: Tag) bool { + return switch (self) { + .br_cond_nosanitize => true, + else => false, + }; + } }; pub const Index = enum(u32) { @@ -4367,6 +4376,7 @@ pub const Function = struct { return switch (wip.instructions.items(.tag)[@intFromEnum(self)]) { .br, .br_cond, + .br_cond_nosanitize, .ret, .@"ret void", .@"switch", @@ -4380,6 +4390,7 @@ pub const Function = struct { return switch (wip.instructions.items(.tag)[@intFromEnum(self)]) { .br, .br_cond, + .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -4470,6 +4481,7 @@ pub const Function = struct { .block => .label, .br, .br_cond, + .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -4656,6 +4668,7 @@ pub const Function = struct { .block => .label, .br, .br_cond, + .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -5100,6 +5113,7 @@ pub const WipFunction = struct { instructions: std.MultiArrayList(Instruction), names: std.ArrayListUnmanaged(String), strip: bool, + any_nosanitize: bool, debug_locations: std.AutoArrayHashMapUnmanaged(Instruction.Index, DebugLocation), debug_values: std.AutoArrayHashMapUnmanaged(Instruction.Index, void), extra: std.ArrayListUnmanaged(u32), @@ -5146,6 +5160,7 @@ pub const WipFunction = struct { .instructions = .{}, .names = .{}, .strip = options.strip, + .any_nosanitize = false, .debug_locations = .{}, .debug_values = .{}, .extra = .{}, @@ -6437,7 +6452,7 @@ pub const WipFunction = struct { .@"ret void", .@"unreachable", => {}, - .br_cond => { + .br_cond, .br_cond_nosanitize => { const extra = self.extraData(Instruction.BrCond, instruction.data); instruction.data = wip_extra.addExtra(Instruction.BrCond{ .cond = instructions.map(extra.cond), @@ -6624,6 +6639,7 @@ pub const WipFunction = struct { function.names = names.ptr; function.value_indices = value_indices.ptr; function.strip = self.strip; + function.any_nosanitize = self.any_nosanitize; function.debug_locations = debug_locations; function.debug_values = debug_values; } @@ -8537,6 +8553,7 @@ pub fn init(options: Options) Allocator.Error!Builder { try self.metadata_string_indices.append(self.gpa, 0); assert(try self.metadataString("") == .none); + assert(try self.debugTuple(&.{}) == .empty_tuple); return self; } @@ -8934,6 +8951,7 @@ pub fn addFunctionAssumeCapacity( .kind = .{ .function = function_index }, }), .strip = undefined, + .any_nosanitize = false, }); return function_index; } @@ -9581,6 +9599,12 @@ pub fn printUnbuffered( }); } if (function.instructions.len > 0) { + const maybe_empty_tuple: ?u32 = if (!function.any_nosanitize) null else b: { + const gop = try metadata_formatter.map.getOrPut(self.gpa, .{ + .metadata = .empty_tuple, + }); + break :b @intCast(gop.index); + }; var block_incoming_len: u32 = undefined; try writer.writeAll(" {\n"); var maybe_dbg_index: ?u32 = null; @@ -9770,6 +9794,14 @@ pub fn printUnbuffered( }), } }, + .br_cond_nosanitize => { + const extra = function.extraData(Function.Instruction.BrCond, instruction.data); + try writer.print(" br {%}, {%}, {%}", .{ + extra.cond.fmt(function_index, self), + extra.then.toInst(&function).fmt(function_index, self), + extra.@"else".toInst(&function).fmt(function_index, self), + }); + }, .call, .@"call fast", .@"musttail call", @@ -10046,8 +10078,12 @@ pub fn printUnbuffered( } if (maybe_dbg_index) |dbg_index| { - try writer.print(", !dbg !{}\n", .{dbg_index}); - } else try writer.writeByte('\n'); + try writer.print(", !dbg !{}", .{dbg_index}); + } + if (instruction.tag.isNosanitize()) { + try writer.print(", !nosanitize !{d}", .{maybe_empty_tuple.?}); + } + try writer.writeByte('\n'); } try writer.writeByte('}'); } From a3d622bdd67846cd6600619d41d022a4a08db00a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 23 Aug 2024 20:45:15 -0700 Subject: [PATCH 2/9] llvm.Builder: revert adding !nosanitize API It's not actually useful after all. --- src/codegen/llvm/Builder.zig | 37 +----------------------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig index 910ae21d3c..029b81dc3f 100644 --- a/src/codegen/llvm/Builder.zig +++ b/src/codegen/llvm/Builder.zig @@ -3994,7 +3994,6 @@ pub const Function = struct { names: [*]const String = &[0]String{}, value_indices: [*]const u32 = &[0]u32{}, strip: bool, - any_nosanitize: bool, debug_locations: std.AutoHashMapUnmanaged(Instruction.Index, DebugLocation) = .{}, debug_values: []const Instruction.Index = &.{}, extra: []const u32 = &.{}, @@ -4091,7 +4090,6 @@ pub const Function = struct { block, br, br_cond, - br_cond_nosanitize, call, @"call fast", cmpxchg, @@ -4347,13 +4345,6 @@ pub const Function = struct { else => unreachable, }; } - - pub fn isNosanitize(self: Tag) bool { - return switch (self) { - .br_cond_nosanitize => true, - else => false, - }; - } }; pub const Index = enum(u32) { @@ -4376,7 +4367,6 @@ pub const Function = struct { return switch (wip.instructions.items(.tag)[@intFromEnum(self)]) { .br, .br_cond, - .br_cond_nosanitize, .ret, .@"ret void", .@"switch", @@ -4390,7 +4380,6 @@ pub const Function = struct { return switch (wip.instructions.items(.tag)[@intFromEnum(self)]) { .br, .br_cond, - .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -4481,7 +4470,6 @@ pub const Function = struct { .block => .label, .br, .br_cond, - .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -4668,7 +4656,6 @@ pub const Function = struct { .block => .label, .br, .br_cond, - .br_cond_nosanitize, .fence, .ret, .@"ret void", @@ -5113,7 +5100,6 @@ pub const WipFunction = struct { instructions: std.MultiArrayList(Instruction), names: std.ArrayListUnmanaged(String), strip: bool, - any_nosanitize: bool, debug_locations: std.AutoArrayHashMapUnmanaged(Instruction.Index, DebugLocation), debug_values: std.AutoArrayHashMapUnmanaged(Instruction.Index, void), extra: std.ArrayListUnmanaged(u32), @@ -5160,7 +5146,6 @@ pub const WipFunction = struct { .instructions = .{}, .names = .{}, .strip = options.strip, - .any_nosanitize = false, .debug_locations = .{}, .debug_values = .{}, .extra = .{}, @@ -6452,7 +6437,7 @@ pub const WipFunction = struct { .@"ret void", .@"unreachable", => {}, - .br_cond, .br_cond_nosanitize => { + .br_cond => { const extra = self.extraData(Instruction.BrCond, instruction.data); instruction.data = wip_extra.addExtra(Instruction.BrCond{ .cond = instructions.map(extra.cond), @@ -6639,7 +6624,6 @@ pub const WipFunction = struct { function.names = names.ptr; function.value_indices = value_indices.ptr; function.strip = self.strip; - function.any_nosanitize = self.any_nosanitize; function.debug_locations = debug_locations; function.debug_values = debug_values; } @@ -8553,7 +8537,6 @@ pub fn init(options: Options) Allocator.Error!Builder { try self.metadata_string_indices.append(self.gpa, 0); assert(try self.metadataString("") == .none); - assert(try self.debugTuple(&.{}) == .empty_tuple); return self; } @@ -8951,7 +8934,6 @@ pub fn addFunctionAssumeCapacity( .kind = .{ .function = function_index }, }), .strip = undefined, - .any_nosanitize = false, }); return function_index; } @@ -9599,12 +9581,6 @@ pub fn printUnbuffered( }); } if (function.instructions.len > 0) { - const maybe_empty_tuple: ?u32 = if (!function.any_nosanitize) null else b: { - const gop = try metadata_formatter.map.getOrPut(self.gpa, .{ - .metadata = .empty_tuple, - }); - break :b @intCast(gop.index); - }; var block_incoming_len: u32 = undefined; try writer.writeAll(" {\n"); var maybe_dbg_index: ?u32 = null; @@ -9794,14 +9770,6 @@ pub fn printUnbuffered( }), } }, - .br_cond_nosanitize => { - const extra = function.extraData(Function.Instruction.BrCond, instruction.data); - try writer.print(" br {%}, {%}, {%}", .{ - extra.cond.fmt(function_index, self), - extra.then.toInst(&function).fmt(function_index, self), - extra.@"else".toInst(&function).fmt(function_index, self), - }); - }, .call, .@"call fast", .@"musttail call", @@ -10080,9 +10048,6 @@ pub fn printUnbuffered( if (maybe_dbg_index) |dbg_index| { try writer.print(", !dbg !{}", .{dbg_index}); } - if (instruction.tag.isNosanitize()) { - try writer.print(", !nosanitize !{d}", .{maybe_empty_tuple.?}); - } try writer.writeByte('\n'); } try writer.writeByte('}'); From 43dc8db068f65f31cd7cf808429c627b29058582 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 27 Aug 2024 20:04:15 -0700 Subject: [PATCH 3/9] print_air: print cond_br branch hints --- src/print_air.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/print_air.zig b/src/print_air.zig index 8e6e21801c..49f7248589 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -791,7 +791,11 @@ const Writer = struct { try w.writeOperand(s, inst, 0, pl_op.operand); if (w.skip_body) return s.writeAll(", ..."); - try s.writeAll(", {\n"); + try s.writeAll(","); + if (extra.data.branch_hints.true != .none) { + try s.print(" {s}", .{@tagName(extra.data.branch_hints.true)}); + } + try s.writeAll(" {\n"); const old_indent = w.indent; w.indent += 2; @@ -806,7 +810,11 @@ const Writer = struct { try w.writeBody(s, then_body); try s.writeByteNTimes(' ', old_indent); - try s.writeAll("}, {\n"); + try s.writeAll("},"); + if (extra.data.branch_hints.false != .none) { + try s.print(" {s}", .{@tagName(extra.data.branch_hints.false)}); + } + try s.writeAll(" {\n"); if (liveness_condbr.else_deaths.len != 0) { try s.writeByteNTimes(' ', w.indent); From b8d99a332395ec0a1b9ed1a8e18f0db8db131b3c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 12:11:08 -0700 Subject: [PATCH 4/9] implement code coverage instrumentation manually instead of relying on the LLVM sancov pass. The LLVM pass is still executed if trace_pc_guard is requested, disabled otherwise. The LLVM backend emits the instrumentation directly. It uses `__sancov_pcs1` symbol name instead of `__sancov_pcs` because each element is 1 usize instead of 2. AIR: add CoveragePoint to branch hints which indicates whether those branches are interesting for code coverage purposes. Update libfuzzer to use the new instrumentation. It's simplified since we no longer need the constructor and the pcs are now in a continguous list. This is a regression in the fuzzing functionality because the instrumentation for comparisons is no longer emitted, resulting in worse fuzzer inputs generated. A future commit will add that instrumentation back. --- lib/fuzzer.zig | 92 +++++++++++++++--------------- src/Air.zig | 13 ++++- src/Sema.zig | 105 +++++++++++++++++++++++++--------- src/codegen/llvm.zig | 131 +++++++++++++++++++++++++++++++++++-------- src/print_air.zig | 6 ++ 5 files changed, 249 insertions(+), 98 deletions(-) diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 48b0b8d9ef..9c67756a6d 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -30,19 +30,6 @@ fn logOverride( export threadlocal var __sancov_lowest_stack: usize = std.math.maxInt(usize); -var module_count_8bc: usize = 0; -var module_count_pcs: usize = 0; - -export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void { - assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0); - fuzzer.pc_counters = start[0 .. end - start]; -} - -export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void { - assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0); - fuzzer.flagged_pcs = start[0 .. end - start]; -} - export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { handleCmp(@returnAddress(), arg1, arg2); } @@ -105,7 +92,7 @@ const Fuzzer = struct { gpa: Allocator, rng: std.Random.DefaultPrng, input: std.ArrayListUnmanaged(u8), - flagged_pcs: []const FlaggedPc, + pcs: []const usize, pc_counters: []u8, n_runs: usize, recent_cases: RunMap, @@ -174,32 +161,18 @@ const Fuzzer = struct { } }; - const FlaggedPc = extern struct { - addr: usize, - flags: packed struct(usize) { - entry: bool, - _: @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }), - }, - }; - const Analysis = struct { score: usize, id: Run.Id, }; - fn init(f: *Fuzzer, cache_dir: std.fs.Dir) !void { - const flagged_pcs = f.flagged_pcs; - + fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void { f.cache_dir = cache_dir; + f.pc_counters = pc_counters; + f.pcs = pcs; // Choose a file name for the coverage based on a hash of the PCs that will be stored within. - const pc_digest = d: { - var hasher = std.hash.Wyhash.init(0); - for (flagged_pcs) |flagged_pc| { - hasher.update(std.mem.asBytes(&flagged_pc.addr)); - } - break :d f.coverage.run_id_hasher.final(); - }; + const pc_digest = std.hash.Wyhash.hash(0, std.mem.sliceAsBytes(pcs)); f.coverage_id = pc_digest; const hex_digest = std.fmt.hex(pc_digest); const coverage_file_path = "v/" ++ hex_digest; @@ -213,12 +186,12 @@ const Fuzzer = struct { .truncate = false, }); defer coverage_file.close(); - const n_bitset_elems = (flagged_pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize); + const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize); comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); comptime assert(SeenPcsHeader.trailing[1] == .pc_addr); const bytes_len = @sizeOf(SeenPcsHeader) + n_bitset_elems * @sizeOf(usize) + - flagged_pcs.len * @sizeOf(usize); + pcs.len * @sizeOf(usize); const existing_len = coverage_file.getEndPos() catch |err| { fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); }; @@ -233,12 +206,12 @@ const Fuzzer = struct { fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); }; if (existing_len != 0) { - const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. flagged_pcs.len * @sizeOf(usize)]; + const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)]; const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); - for (existing_pcs, flagged_pcs, 0..) |old, new, i| { - if (old != new.addr) { + for (existing_pcs, pcs, 0..) |old, new, i| { + if (old != new) { fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ - i, old, new.addr, + i, old, new, }); } } @@ -246,14 +219,12 @@ const Fuzzer = struct { const header: SeenPcsHeader = .{ .n_runs = 0, .unique_runs = 0, - .pcs_len = flagged_pcs.len, + .pcs_len = pcs.len, .lowest_stack = std.math.maxInt(usize), }; f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems * @sizeOf(usize)); - for (flagged_pcs) |flagged_pc| { - f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr)); - } + f.seen_pcs.appendSliceAssumeCapacity(std.mem.sliceAsBytes(pcs)); } } @@ -307,8 +278,8 @@ const Fuzzer = struct { // Track code coverage from all runs. comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]); - const remainder = f.flagged_pcs.len % @bitSizeOf(usize); - const aligned_len = f.flagged_pcs.len - remainder; + const remainder = f.pcs.len % @bitSizeOf(usize); + const aligned_len = f.pcs.len - remainder; const seen_pcs = header_end_ptr[0..aligned_len]; const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]); const V = @Vector(@bitSizeOf(usize), u8); @@ -433,7 +404,7 @@ var fuzzer: Fuzzer = .{ .gpa = general_purpose_allocator.allocator(), .rng = std.Random.DefaultPrng.init(0), .input = .{}, - .flagged_pcs = undefined, + .pcs = undefined, .pc_counters = undefined, .n_runs = 0, .recent_cases = .{}, @@ -455,8 +426,32 @@ export fn fuzzer_next() Fuzzer.Slice { } export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { - if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{}); - if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{}); + // Linkers are expected to automatically add `__start_
` and + // `__stop_
` symbols when section names are valid C identifiers. + + const pc_counters_start = @extern([*]u8, .{ + .name = "__start___sancov_cntrs", + .linkage = .weak, + }) orelse fatal("missing __start___sancov_cntrs symbol"); + + const pc_counters_end = @extern([*]u8, .{ + .name = "__stop___sancov_cntrs", + .linkage = .weak, + }) orelse fatal("missing __stop___sancov_cntrs symbol"); + + const pc_counters = pc_counters_start[0 .. pc_counters_end - pc_counters_start]; + + const pcs_start = @extern([*]usize, .{ + .name = "__start___sancov_pcs1", + .linkage = .weak, + }) orelse fatal("missing __start___sancov_pcs1 symbol"); + + const pcs_end = @extern([*]usize, .{ + .name = "__stop___sancov_pcs1", + .linkage = .weak, + }) orelse fatal("missing __stop___sancov_pcs1 symbol"); + + const pcs = pcs_start[0 .. pcs_end - pcs_start]; const cache_dir_path = cache_dir_struct.toZig(); const cache_dir = if (cache_dir_path.len == 0) @@ -466,7 +461,8 @@ export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) }); }; - fuzzer.init(cache_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); + fuzzer.init(cache_dir, pc_counters, pcs) catch |err| + fatal("unable to init fuzzer: {s}", .{@errorName(err)}); } /// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. diff --git a/src/Air.zig b/src/Air.zig index 5bbde22251..d64fe2983d 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -1126,7 +1126,9 @@ pub const CondBr = struct { pub const BranchHints = packed struct(u32) { true: std.builtin.BranchHint, false: std.builtin.BranchHint, - _: u26 = 0, + then_cov: CoveragePoint, + else_cov: CoveragePoint, + _: u24 = 0, }; }; @@ -1903,3 +1905,12 @@ pub fn unwrapSwitch(air: *const Air, switch_inst: Inst.Index) UnwrappedSwitch { pub const typesFullyResolved = types_resolved.typesFullyResolved; pub const typeFullyResolved = types_resolved.checkType; pub const valFullyResolved = types_resolved.checkVal; + +pub const CoveragePoint = enum(u1) { + /// Indicates the block is not a place of interest corresponding to + /// a source location for coverage purposes. + none, + /// Point of interest. The next instruction emitted corresponds to + /// a source location used for coverage instrumentation. + poi, +}; diff --git a/src/Sema.zig b/src/Sema.zig index 85a1e90a48..491e6ec491 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -6898,8 +6898,14 @@ fn popErrorReturnTrace( .payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(then_block.instructions.items.len), .else_body_len = @intCast(else_block.instructions.items.len), - // weight against error branch - .branch_hints = .{ .true = .likely, .false = .unlikely }, + .branch_hints = .{ + // Weight against error branch. + .true = .likely, + .false = .unlikely, + // Code coverage is not valuable on either branch. + .then_cov = .none, + .else_cov = .none, + }, }), }, }, @@ -11796,14 +11802,22 @@ fn zirSwitchBlockErrUnion(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Comp _ = try child_block.addInst(.{ .tag = .cond_br, - .data = .{ .pl_op = .{ - .operand = cond, - .payload = sema.addExtraAssumeCapacity(Air.CondBr{ - .then_body_len = @intCast(true_instructions.len), - .else_body_len = @intCast(sub_block.instructions.items.len), - .branch_hints = .{ .true = non_error_hint, .false = .none }, - }), - } }, + .data = .{ + .pl_op = .{ + .operand = cond, + .payload = sema.addExtraAssumeCapacity(Air.CondBr{ + .then_body_len = @intCast(true_instructions.len), + .else_body_len = @intCast(sub_block.instructions.items.len), + .branch_hints = .{ + .true = non_error_hint, + .false = .none, + // Code coverage is desired for error handling. + .then_cov = .poi, + .else_cov = .poi, + }, + }), + }, + }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(true_instructions)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(sub_block.instructions.items)); @@ -12853,7 +12867,13 @@ fn analyzeSwitchRuntimeBlock( sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(prev_then_body.len), .else_body_len = @intCast(cond_body.len), - .branch_hints = .{ .true = prev_hint, .false = .none }, + .branch_hints = .{ + .true = prev_hint, + .false = .none, + // Code coverage is desired for error handling. + .then_cov = .poi, + .else_cov = .poi, + }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(prev_then_body)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(cond_body)); @@ -13133,7 +13153,12 @@ fn analyzeSwitchRuntimeBlock( sema.air_instructions.items(.data)[@intFromEnum(prev_cond_br)].pl_op.payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(prev_then_body.len), .else_body_len = @intCast(case_block.instructions.items.len), - .branch_hints = .{ .true = prev_hint, .false = else_hint }, + .branch_hints = .{ + .true = prev_hint, + .false = else_hint, + .then_cov = .poi, + .else_cov = .poi, + }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(prev_then_body)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(case_block.instructions.items)); @@ -19250,7 +19275,17 @@ fn zirBoolBr( &else_block, lhs, block_inst, - if (is_bool_or) .{ .true = .none, .false = rhs_hint } else .{ .true = rhs_hint, .false = .none }, + if (is_bool_or) .{ + .true = .none, + .false = rhs_hint, + .then_cov = .poi, + .else_cov = .poi, + } else .{ + .true = rhs_hint, + .false = .none, + .then_cov = .poi, + .else_cov = .poi, + }, ); if (!rhs_noret) { if (try sema.resolveDefinedValue(rhs_block, rhs_src, coerced_rhs_result)) |rhs_val| { @@ -19467,14 +19502,22 @@ fn zirCondbr( true_instructions.len + sub_block.instructions.items.len); _ = try parent_block.addInst(.{ .tag = .cond_br, - .data = .{ .pl_op = .{ - .operand = cond, - .payload = sema.addExtraAssumeCapacity(Air.CondBr{ - .then_body_len = @intCast(true_instructions.len), - .else_body_len = @intCast(sub_block.instructions.items.len), - .branch_hints = .{ .true = true_hint, .false = false_hint }, - }), - } }, + .data = .{ + .pl_op = .{ + .operand = cond, + .payload = sema.addExtraAssumeCapacity(Air.CondBr{ + .then_body_len = @intCast(true_instructions.len), + .else_body_len = @intCast(sub_block.instructions.items.len), + .branch_hints = .{ + .true = true_hint, + .false = false_hint, + // Code coverage is desired for error handling. + .then_cov = .poi, + .else_cov = .poi, + }, + }), + }, + }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(true_instructions)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(sub_block.instructions.items)); @@ -19851,8 +19894,14 @@ fn retWithErrTracing( const cond_br_payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = @intCast(then_block.instructions.items.len), .else_body_len = @intCast(else_block.instructions.items.len), - // weight against error branch - .branch_hints = .{ .true = .likely, .false = .unlikely }, + .branch_hints = .{ + // Weight against error branch. + .true = .likely, + .false = .unlikely, + // Code coverage is not valuable on either branch. + .then_cov = .none, + .else_cov = .none, + }, }); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(then_block.instructions.items)); sema.air_extra.appendSliceAssumeCapacity(@ptrCast(else_block.instructions.items)); @@ -27473,8 +27522,14 @@ fn addSafetyCheckExtra( .payload = sema.addExtraAssumeCapacity(Air.CondBr{ .then_body_len = 1, .else_body_len = @intCast(fail_block.instructions.items.len), - // safety check failure branch is cold - .branch_hints = .{ .true = .likely, .false = .cold }, + .branch_hints = .{ + // Safety check failure branch is cold. + .true = .likely, + .false = .cold, + // Code coverage not wanted for panic branches. + .then_cov = .none, + .else_cov = .none, + }, }), }, }, diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index e7cb57d76e..2b207a6b0f 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1275,7 +1275,7 @@ pub const Object = struct { .is_small = options.is_small, .time_report = options.time_report, .tsan = options.sanitize_thread, - .sancov = options.fuzz, + .sancov = sanCovPassEnabled(comp.config.san_cov_trace_pc_guard), .lto = options.lto, .asm_filename = null, .bin_filename = options.bin_path, @@ -1283,19 +1283,19 @@ pub const Object = struct { .bitcode_filename = null, .coverage = .{ .CoverageType = .Edge, - .IndirectCalls = true, + .IndirectCalls = false, .TraceBB = false, - .TraceCmp = true, + .TraceCmp = false, .TraceDiv = false, .TraceGep = false, .Use8bitCounters = false, .TracePC = false, .TracePCGuard = comp.config.san_cov_trace_pc_guard, - .Inline8bitCounters = true, + .Inline8bitCounters = false, .InlineBoolFlag = false, - .PCTable = true, + .PCTable = false, .NoPrune = false, - .StackDepth = true, + .StackDepth = false, .TraceLoads = false, .TraceStores = false, .CollectControlFlow = false, @@ -1655,6 +1655,25 @@ pub const Object = struct { break :debug_info .{ file, subprogram }; } else .{.none} ** 2; + const fuzz: ?FuncGen.Fuzz = f: { + if (!owner_mod.fuzz) break :f null; + if (func_analysis.disable_instrumentation) break :f null; + if (is_naked) break :f null; + + // The void type used here is a placeholder to be replaced with an + // array of the appropriate size after the POI count is known. + + const counters_variable = try o.builder.addVariable(.empty, .void, .default); + counters_variable.setLinkage(.private, &o.builder); + counters_variable.setAlignment(comptime Builder.Alignment.fromByteUnits(1), &o.builder); + counters_variable.setSection(try o.builder.string("__sancov_cntrs"), &o.builder); + + break :f .{ + .counters_variable = counters_variable, + .pcs = .{}, + }; + }; + var fg: FuncGen = .{ .gpa = gpa, .air = air, @@ -1662,6 +1681,7 @@ pub const Object = struct { .ng = &ng, .wip = wip, .is_naked = fn_info.cc == .Naked, + .fuzz = fuzz, .ret_ptr = ret_ptr, .args = args.items, .arg_index = 0, @@ -1679,7 +1699,7 @@ pub const Object = struct { defer fg.deinit(); deinit_wip = false; - fg.genBody(air.getMainBody()) catch |err| switch (err) { + fg.genBody(air.getMainBody(), .poi) catch |err| switch (err) { error.CodegenFail => { try zcu.failed_codegen.put(zcu.gpa, func.owner_nav, ng.err_msg.?); ng.err_msg = null; @@ -1688,6 +1708,24 @@ pub const Object = struct { else => |e| return e, }; + if (fg.fuzz) |*f| { + { + const array_llvm_ty = try o.builder.arrayType(f.pcs.items.len, .i8); + f.counters_variable.ptrConst(&o.builder).global.ptr(&o.builder).type = array_llvm_ty; + const zero_init = try o.builder.zeroInitConst(array_llvm_ty); + try f.counters_variable.setInitializer(zero_init, &o.builder); + } + + const array_llvm_ty = try o.builder.arrayType(f.pcs.items.len, .ptr); + const init_val = try o.builder.arrayConst(array_llvm_ty, f.pcs.items); + const pcs_variable = try o.builder.addVariable(.empty, array_llvm_ty, .default); + pcs_variable.setLinkage(.private, &o.builder); + pcs_variable.setMutability(.constant, &o.builder); + pcs_variable.setAlignment(Type.usize.abiAlignment(zcu).toLlvm(), &o.builder); + pcs_variable.setSection(try o.builder.string("__sancov_pcs1"), &o.builder); + try pcs_variable.setInitializer(init_val, &o.builder); + } + try fg.wip.finish(); } @@ -4729,6 +4767,7 @@ pub const FuncGen = struct { liveness: Liveness, wip: Builder.WipFunction, is_naked: bool, + fuzz: ?Fuzz, file: Builder.Metadata, scope: Builder.Metadata, @@ -4769,6 +4808,16 @@ pub const FuncGen = struct { sync_scope: Builder.SyncScope, + const Fuzz = struct { + counters_variable: Builder.Variable.Index, + pcs: std.ArrayListUnmanaged(Builder.Constant), + + fn deinit(f: *Fuzz, gpa: Allocator) void { + f.pcs.deinit(gpa); + f.* = undefined; + } + }; + const BreakList = union { list: std.MultiArrayList(struct { bb: Builder.Function.Block.Index, @@ -4778,9 +4827,11 @@ pub const FuncGen = struct { }; fn deinit(self: *FuncGen) void { + const gpa = self.gpa; + if (self.fuzz) |*f| f.deinit(self.gpa); self.wip.deinit(); - self.func_inst_table.deinit(self.gpa); - self.blocks.deinit(self.gpa); + self.func_inst_table.deinit(gpa); + self.blocks.deinit(gpa); } fn todo(self: *FuncGen, comptime format: []const u8, args: anytype) Error { @@ -4836,11 +4887,33 @@ pub const FuncGen = struct { return o.null_opt_usize; } - fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void { + fn genBody(self: *FuncGen, body: []const Air.Inst.Index, coverage_point: Air.CoveragePoint) Error!void { const o = self.ng.object; const zcu = o.pt.zcu; const ip = &zcu.intern_pool; const air_tags = self.air.instructions.items(.tag); + switch (coverage_point) { + .none => {}, + .poi => if (self.fuzz) |*fuzz| { + const poi_index = fuzz.pcs.items.len; + const base_ptr = fuzz.counters_variable.toValue(&o.builder); + const ptr = if (poi_index == 0) base_ptr else try self.wip.gep(.inbounds, .i8, base_ptr, &.{ + try o.builder.intValue(.i32, poi_index), + }, ""); + const counter = try self.wip.load(.normal, .i8, ptr, .default, ""); + const one = try o.builder.intValue(.i8, 1); + const counter_incremented = try self.wip.bin(.add, counter, one, ""); + _ = try self.wip.store(.normal, counter_incremented, ptr, .default); + + // LLVM does not allow blockaddress on the entry block. + const pc = if (self.wip.cursor.block == .entry) + self.wip.function.toConst(&o.builder) + else + try o.builder.blockAddrConst(self.wip.function, self.wip.cursor.block); + const gpa = self.gpa; + try fuzz.pcs.append(gpa, pc); + }, + } for (body, 0..) |inst, i| { if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; @@ -4949,7 +5022,7 @@ pub const FuncGen = struct { .ret_ptr => try self.airRetPtr(inst), .arg => try self.airArg(inst), .bitcast => try self.airBitCast(inst), - .int_from_bool => try self.airIntFromBool(inst), + .int_from_bool => try self.airIntFromBool(inst), .block => try self.airBlock(inst), .br => try self.airBr(inst), .switch_br => try self.airSwitchBr(inst), @@ -4966,7 +5039,7 @@ pub const FuncGen = struct { .trunc => try self.airTrunc(inst), .fptrunc => try self.airFptrunc(inst), .fpext => try self.airFpext(inst), - .int_from_ptr => try self.airIntFromPtr(inst), + .int_from_ptr => try self.airIntFromPtr(inst), .load => try self.airLoad(body[i..]), .loop => try self.airLoop(inst), .not => try self.airNot(inst), @@ -5089,8 +5162,13 @@ pub const FuncGen = struct { } } - fn genBodyDebugScope(self: *FuncGen, maybe_inline_func: ?InternPool.Index, body: []const Air.Inst.Index) Error!void { - if (self.wip.strip) return self.genBody(body); + fn genBodyDebugScope( + self: *FuncGen, + maybe_inline_func: ?InternPool.Index, + body: []const Air.Inst.Index, + coverage_point: Air.CoveragePoint, + ) Error!void { + if (self.wip.strip) return self.genBody(body, coverage_point); const old_file = self.file; const old_inlined = self.inlined; @@ -5137,7 +5215,8 @@ pub const FuncGen = struct { .sp_flags = .{ .Optimized = mod.optimize_mode != .Debug, .Definition = true, - .LocalToUnit = true, // TODO: we can't know this at this point, since the function could be exported later! + // TODO: we can't know this at this point, since the function could be exported later! + .LocalToUnit = true, }, }, o.debug_compile_unit, @@ -5171,7 +5250,7 @@ pub const FuncGen = struct { .no_location => {}, }; - try self.genBody(body); + try self.genBody(body, coverage_point); } pub const CallAttr = enum { @@ -5881,7 +5960,7 @@ pub const FuncGen = struct { const inst_ty = self.typeOfIndex(inst); if (inst_ty.isNoReturn(zcu)) { - try self.genBodyDebugScope(maybe_inline_func, body); + try self.genBodyDebugScope(maybe_inline_func, body, .none); return .none; } @@ -5897,7 +5976,7 @@ pub const FuncGen = struct { }); defer assert(self.blocks.remove(inst)); - try self.genBodyDebugScope(maybe_inline_func, body); + try self.genBodyDebugScope(maybe_inline_func, body, .none); self.wip.cursor = .{ .block = parent_bb }; @@ -5996,11 +6075,11 @@ pub const FuncGen = struct { self.wip.cursor = .{ .block = then_block }; if (hint == .then_cold) _ = try self.wip.callIntrinsicAssumeCold(); - try self.genBodyDebugScope(null, then_body); + try self.genBodyDebugScope(null, then_body, extra.data.branch_hints.then_cov); self.wip.cursor = .{ .block = else_block }; if (hint == .else_cold) _ = try self.wip.callIntrinsicAssumeCold(); - try self.genBodyDebugScope(null, else_body); + try self.genBodyDebugScope(null, else_body, extra.data.branch_hints.else_cov); // No need to reset the insert cursor since this instruction is noreturn. return .none; @@ -6085,7 +6164,7 @@ pub const FuncGen = struct { fg.wip.cursor = .{ .block = return_block }; if (err_cold) _ = try fg.wip.callIntrinsicAssumeCold(); - try fg.genBodyDebugScope(null, body); + try fg.genBodyDebugScope(null, body, .poi); fg.wip.cursor = .{ .block = continue_block }; } @@ -6196,14 +6275,14 @@ pub const FuncGen = struct { } self.wip.cursor = .{ .block = case_block }; if (switch_br.getHint(case.idx) == .cold) _ = try self.wip.callIntrinsicAssumeCold(); - try self.genBodyDebugScope(null, case.body); + try self.genBodyDebugScope(null, case.body, .poi); } const else_body = it.elseBody(); self.wip.cursor = .{ .block = else_block }; if (switch_br.getElseHint() == .cold) _ = try self.wip.callIntrinsicAssumeCold(); if (else_body.len != 0) { - try self.genBodyDebugScope(null, else_body); + try self.genBodyDebugScope(null, else_body, .poi); } else { _ = try self.wip.@"unreachable"(); } @@ -6222,7 +6301,7 @@ pub const FuncGen = struct { _ = try self.wip.br(loop_block); self.wip.cursor = .{ .block = loop_block }; - try self.genBodyDebugScope(null, body); + try self.genBodyDebugScope(null, body, .none); // TODO instead of this logic, change AIR to have the property that // every block is guaranteed to end with a noreturn instruction. @@ -12194,3 +12273,7 @@ pub fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void { => unreachable, } } + +fn sanCovPassEnabled(trace_pc_guard: bool) bool { + return trace_pc_guard; +} diff --git a/src/print_air.zig b/src/print_air.zig index 49f7248589..227f362c39 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -795,6 +795,9 @@ const Writer = struct { if (extra.data.branch_hints.true != .none) { try s.print(" {s}", .{@tagName(extra.data.branch_hints.true)}); } + if (extra.data.branch_hints.then_cov != .none) { + try s.print(" {s}", .{@tagName(extra.data.branch_hints.then_cov)}); + } try s.writeAll(" {\n"); const old_indent = w.indent; w.indent += 2; @@ -814,6 +817,9 @@ const Writer = struct { if (extra.data.branch_hints.false != .none) { try s.print(" {s}", .{@tagName(extra.data.branch_hints.false)}); } + if (extra.data.branch_hints.else_cov != .none) { + try s.print(" {s}", .{@tagName(extra.data.branch_hints.else_cov)}); + } try s.writeAll(" {\n"); if (liveness_condbr.else_deaths.len != 0) { From 88bba4c15463796c0b89a4d097366b11bdb7679c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 14:36:28 -0700 Subject: [PATCH 5/9] LLVM: enable sancov pass partially It's useful to have TraceCmp based on the results of LLVM optimizations, while the code coverage bits were emitted by Zig manually, allowing more careful correlation to points of interest in the source code. This re-enables the sancov pass in `-ffuzz` mode, but only TraceCmp. Notably, IndirectCalls is off, which needs to be implemented manually in the LLVM backend, and StackDepth remains off, because it is not used by libfuzzer or AFL either. If stack depth is re-introduced, it can be done with better performance characteristics by being function call graph aware, and only lowered in call graph cycles, where its heuristic properties come in useful. Fixes the fuzzing regression. --- src/codegen/llvm.zig | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 2b207a6b0f..f99018310a 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1275,7 +1275,7 @@ pub const Object = struct { .is_small = options.is_small, .time_report = options.time_report, .tsan = options.sanitize_thread, - .sancov = sanCovPassEnabled(comp.config.san_cov_trace_pc_guard), + .sancov = options.fuzz, .lto = options.lto, .asm_filename = null, .bin_filename = options.bin_path, @@ -1283,16 +1283,21 @@ pub const Object = struct { .bitcode_filename = null, .coverage = .{ .CoverageType = .Edge, + // Works in tandem with Inline8bitCounters or InlineBoolFlag. + // Zig does not yet implement its own version of this but it + // needs to for better fuzzing logic. .IndirectCalls = false, .TraceBB = false, - .TraceCmp = false, + .TraceCmp = true, .TraceDiv = false, .TraceGep = false, .Use8bitCounters = false, .TracePC = false, .TracePCGuard = comp.config.san_cov_trace_pc_guard, + // Zig emits its own inline 8-bit counters instrumentation. .Inline8bitCounters = false, .InlineBoolFlag = false, + // Zig emits its own PC table instrumentation. .PCTable = false, .NoPrune = false, .StackDepth = false, @@ -12273,7 +12278,3 @@ pub fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void { => unreachable, } } - -fn sanCovPassEnabled(trace_pc_guard: bool) bool { - return trace_pc_guard; -} From 1bec824cadba7870d6e2b52588d782f32be44866 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 14:52:57 -0700 Subject: [PATCH 6/9] LLVM: disable inline 8-bit counters when using trace pc guard --- lib/std/Build/Step/Compile.zig | 16 +++++++++++----- src/codegen/llvm.zig | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 1aeebbb55b..922d64c728 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -218,12 +218,18 @@ no_builtin: bool = false, /// Managed by the build runner, not user build script. zig_process: ?*Step.ZigProcess, -/// Enables deprecated coverage instrumentation that is only useful if you -/// are using third party fuzzers that depend on it. Otherwise, slows down -/// the instrumented binary with unnecessary function calls. +/// Enables coverage instrumentation that is only useful if you are using third +/// party fuzzers that depend on it. Otherwise, slows down the instrumented +/// binary with unnecessary function calls. /// -/// To enable fuzz testing instrumentation on a compilation, see the `fuzz` -/// flag in `Module`. +/// This kind of coverage instrumentation is used by AFLplusplus v4.21c, +/// however, modern fuzzers - including Zig - have switched to using "inline +/// 8-bit counters" or "inline bool flag" which incurs only a single +/// instruction for coverage, along with "trace cmp" which instruments +/// comparisons and reports the operands. +/// +/// To instead enable fuzz testing instrumentation on a compilation using Zig's +/// builtin fuzzer, see the `fuzz` flag in `Module`. sanitize_coverage_trace_pc_guard: ?bool = null, pub const ExpectedCompileErrors = union(enum) { diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index f99018310a..d59fdd42a3 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1664,6 +1664,7 @@ pub const Object = struct { if (!owner_mod.fuzz) break :f null; if (func_analysis.disable_instrumentation) break :f null; if (is_naked) break :f null; + if (comp.config.san_cov_trace_pc_guard) break :f null; // The void type used here is a placeholder to be replaced with an // array of the appropriate size after the POI count is known. From 9e11c4f60ea56af50bed7bb27984307762d5f167 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 15:18:42 -0700 Subject: [PATCH 7/9] LLVM: put sancov globals into llvm.compiler.used This matches what LLVM's sancov pass does and is required so that optimization passes do not delete the instrumentation. However, this is currently triggering an error: "members of llvm.compiler.used must be named" so the next commit will add names to those globals. --- src/codegen/llvm.zig | 72 +++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index d59fdd42a3..b78e2addef 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -822,6 +822,9 @@ pub const Object = struct { /// This is denormalized data. struct_field_map: std.AutoHashMapUnmanaged(ZigStructField, c_uint), + /// Values for `@llvm.compiler.used`. + compiler_used: std.ArrayListUnmanaged(Builder.Constant), + const ZigStructField = struct { struct_ty: InternPool.Index, field_index: u32, @@ -975,6 +978,7 @@ pub const Object = struct { .error_name_table = .none, .null_opt_usize = .no_init, .struct_field_map = .{}, + .compiler_used = .{}, }; return obj; } @@ -1097,44 +1101,57 @@ pub const Object = struct { lto: bool, }; - pub fn emit(self: *Object, options: EmitOptions) !void { - const zcu = self.pt.zcu; + pub fn emit(o: *Object, options: EmitOptions) !void { + const zcu = o.pt.zcu; const comp = zcu.comp; { - try self.genErrorNameTable(); - try self.genCmpLtErrorsLenFunction(); - try self.genModuleLevelAssembly(); + try o.genErrorNameTable(); + try o.genCmpLtErrorsLenFunction(); + try o.genModuleLevelAssembly(); - if (!self.builder.strip) { + if (o.compiler_used.items.len > 0) { + const array_llvm_ty = try o.builder.arrayType(o.compiler_used.items.len, .ptr); + const init_val = try o.builder.arrayConst(array_llvm_ty, o.compiler_used.items); + const compiler_used_variable = try o.builder.addVariable( + try o.builder.strtabString("llvm.compiler.used"), + array_llvm_ty, + .default, + ); + compiler_used_variable.setLinkage(.appending, &o.builder); + compiler_used_variable.setSection(try o.builder.string("llvm.metadata"), &o.builder); + try compiler_used_variable.setInitializer(init_val, &o.builder); + } + + if (!o.builder.strip) { { var i: usize = 0; - while (i < self.debug_unresolved_namespace_scopes.count()) : (i += 1) { - const namespace_index = self.debug_unresolved_namespace_scopes.keys()[i]; - const fwd_ref = self.debug_unresolved_namespace_scopes.values()[i]; + while (i < o.debug_unresolved_namespace_scopes.count()) : (i += 1) { + const namespace_index = o.debug_unresolved_namespace_scopes.keys()[i]; + const fwd_ref = o.debug_unresolved_namespace_scopes.values()[i]; const namespace = zcu.namespacePtr(namespace_index); - const debug_type = try self.lowerDebugType(Type.fromInterned(namespace.owner_type)); + const debug_type = try o.lowerDebugType(Type.fromInterned(namespace.owner_type)); - self.builder.debugForwardReferenceSetType(fwd_ref, debug_type); + o.builder.debugForwardReferenceSetType(fwd_ref, debug_type); } } - self.builder.debugForwardReferenceSetType( - self.debug_enums_fwd_ref, - try self.builder.metadataTuple(self.debug_enums.items), + o.builder.debugForwardReferenceSetType( + o.debug_enums_fwd_ref, + try o.builder.metadataTuple(o.debug_enums.items), ); - self.builder.debugForwardReferenceSetType( - self.debug_globals_fwd_ref, - try self.builder.metadataTuple(self.debug_globals.items), + o.builder.debugForwardReferenceSetType( + o.debug_globals_fwd_ref, + try o.builder.metadataTuple(o.debug_globals.items), ); } } const target_triple_sentinel = - try self.gpa.dupeZ(u8, self.builder.target_triple.slice(&self.builder).?); - defer self.gpa.free(target_triple_sentinel); + try o.gpa.dupeZ(u8, o.builder.target_triple.slice(&o.builder).?); + defer o.gpa.free(target_triple_sentinel); const emit_asm_msg = options.asm_path orelse "(none)"; const emit_bin_msg = options.bin_path orelse "(none)"; @@ -1147,15 +1164,15 @@ pub const Object = struct { const context, const module = emit: { if (options.pre_ir_path) |path| { if (std.mem.eql(u8, path, "-")) { - self.builder.dump(); + o.builder.dump(); } else { - _ = try self.builder.printToFile(path); + _ = try o.builder.printToFile(path); } } - const bitcode = try self.builder.toBitcode(self.gpa); - defer self.gpa.free(bitcode); - self.builder.clearAndFree(); + const bitcode = try o.builder.toBitcode(o.gpa); + defer o.gpa.free(bitcode); + o.builder.clearAndFree(); if (options.pre_bc_path) |path| { var file = try std.fs.cwd().createFile(path, .{}); @@ -1707,7 +1724,7 @@ pub const Object = struct { fg.genBody(air.getMainBody(), .poi) catch |err| switch (err) { error.CodegenFail => { - try zcu.failed_codegen.put(zcu.gpa, func.owner_nav, ng.err_msg.?); + try zcu.failed_codegen.put(gpa, func.owner_nav, ng.err_msg.?); ng.err_msg = null; return; }, @@ -1730,6 +1747,11 @@ pub const Object = struct { pcs_variable.setAlignment(Type.usize.abiAlignment(zcu).toLlvm(), &o.builder); pcs_variable.setSection(try o.builder.string("__sancov_pcs1"), &o.builder); try pcs_variable.setInitializer(init_val, &o.builder); + + try o.compiler_used.appendSlice(gpa, &.{ + f.counters_variable.toConst(&o.builder), + pcs_variable.toConst(&o.builder), + }); } try fg.wip.finish(); From c81219c573f75a2163dfaae5bae7d7373e179f91 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 16:57:43 -0700 Subject: [PATCH 8/9] LLVM: use `@llvm.used` instead of `@llvm.compiler.used` because it marks the linker section, preventing garbage collection. Also, name the members because that is required by this intrinsic. Also, enable the StackDepth option in the sancov pass as a workaround for https://github.com/llvm/llvm-project/pull/106464, otherwise, LLVM enables TracePCGuard even though we explicitly disable it. --- src/codegen/llvm.zig | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index b78e2addef..6bf7476a4e 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -822,8 +822,8 @@ pub const Object = struct { /// This is denormalized data. struct_field_map: std.AutoHashMapUnmanaged(ZigStructField, c_uint), - /// Values for `@llvm.compiler.used`. - compiler_used: std.ArrayListUnmanaged(Builder.Constant), + /// Values for `@llvm.used`. + used: std.ArrayListUnmanaged(Builder.Constant), const ZigStructField = struct { struct_ty: InternPool.Index, @@ -978,7 +978,7 @@ pub const Object = struct { .error_name_table = .none, .null_opt_usize = .no_init, .struct_field_map = .{}, - .compiler_used = .{}, + .used = .{}, }; return obj; } @@ -1110,11 +1110,11 @@ pub const Object = struct { try o.genCmpLtErrorsLenFunction(); try o.genModuleLevelAssembly(); - if (o.compiler_used.items.len > 0) { - const array_llvm_ty = try o.builder.arrayType(o.compiler_used.items.len, .ptr); - const init_val = try o.builder.arrayConst(array_llvm_ty, o.compiler_used.items); + if (o.used.items.len > 0) { + const array_llvm_ty = try o.builder.arrayType(o.used.items.len, .ptr); + const init_val = try o.builder.arrayConst(array_llvm_ty, o.used.items); const compiler_used_variable = try o.builder.addVariable( - try o.builder.strtabString("llvm.compiler.used"), + try o.builder.strtabString("llvm.used"), array_llvm_ty, .default, ); @@ -1317,7 +1317,8 @@ pub const Object = struct { // Zig emits its own PC table instrumentation. .PCTable = false, .NoPrune = false, - .StackDepth = false, + // Workaround for https://github.com/llvm/llvm-project/pull/106464 + .StackDepth = true, .TraceLoads = false, .TraceStores = false, .CollectControlFlow = false, @@ -1686,7 +1687,10 @@ pub const Object = struct { // The void type used here is a placeholder to be replaced with an // array of the appropriate size after the POI count is known. - const counters_variable = try o.builder.addVariable(.empty, .void, .default); + // Due to error "members of llvm.compiler.used must be named", this global needs a name. + const anon_name = try o.builder.strtabStringFmt("__sancov_gen_.{d}", .{o.used.items.len}); + const counters_variable = try o.builder.addVariable(anon_name, .void, .default); + try o.used.append(gpa, counters_variable.toConst(&o.builder)); counters_variable.setLinkage(.private, &o.builder); counters_variable.setAlignment(comptime Builder.Alignment.fromByteUnits(1), &o.builder); counters_variable.setSection(try o.builder.string("__sancov_cntrs"), &o.builder); @@ -1741,17 +1745,15 @@ pub const Object = struct { const array_llvm_ty = try o.builder.arrayType(f.pcs.items.len, .ptr); const init_val = try o.builder.arrayConst(array_llvm_ty, f.pcs.items); - const pcs_variable = try o.builder.addVariable(.empty, array_llvm_ty, .default); + // Due to error "members of llvm.compiler.used must be named", this global needs a name. + const anon_name = try o.builder.strtabStringFmt("__sancov_gen_.{d}", .{o.used.items.len}); + const pcs_variable = try o.builder.addVariable(anon_name, array_llvm_ty, .default); + try o.used.append(gpa, pcs_variable.toConst(&o.builder)); pcs_variable.setLinkage(.private, &o.builder); pcs_variable.setMutability(.constant, &o.builder); pcs_variable.setAlignment(Type.usize.abiAlignment(zcu).toLlvm(), &o.builder); pcs_variable.setSection(try o.builder.string("__sancov_pcs1"), &o.builder); try pcs_variable.setInitializer(init_val, &o.builder); - - try o.compiler_used.appendSlice(gpa, &.{ - f.counters_variable.toConst(&o.builder), - pcs_variable.toConst(&o.builder), - }); } try fg.wip.finish(); From 13b5cee4cce2be7b5d1423fcd59b00ff1807142e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 28 Aug 2024 18:06:35 -0700 Subject: [PATCH 9/9] fuzzing: fix entry address logic * the pcs list is unsorted * use the function address Fixes entry points in ReleaseSafe mode. --- lib/compiler/test_runner.zig | 3 +-- lib/std/Build/Fuzz/WebServer.zig | 17 +++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 836f70f931..ac9629a57d 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -166,6 +166,7 @@ fn mainServer() !void { if (log_err_count != 0) @panic("error logs detected"); if (first) { first = false; + const entry_addr = @intFromPtr(test_fn.func); try server.serveU64Message(.fuzz_start_addr, entry_addr); } } @@ -347,7 +348,6 @@ const FuzzerSlice = extern struct { }; var is_fuzz_test: bool = undefined; -var entry_addr: usize = 0; extern fn fuzzer_next() FuzzerSlice; extern fn fuzzer_init(cache_dir: FuzzerSlice) void; @@ -358,7 +358,6 @@ pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 { if (crippled) return ""; is_fuzz_test = true; if (builtin.fuzz) { - if (entry_addr == 0) entry_addr = @returnAddress(); return fuzzer_next().toSlice(); } if (options.corpus.len == 0) return ""; diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig index 26b25b83d9..a0ab018cf5 100644 --- a/lib/std/Build/Fuzz/WebServer.zig +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -664,11 +664,16 @@ fn addEntryPoint(ws: *WebServer, coverage_id: u64, addr: u64) error{ AlreadyRepo const coverage_map = ws.coverage_files.getPtr(coverage_id).?; const header: *const abi.SeenPcsHeader = @ptrCast(coverage_map.mapped_memory[0..@sizeOf(abi.SeenPcsHeader)]); const pcs = header.pcAddrs(); - const index = std.sort.upperBound(usize, pcs, addr, struct { - fn order(context: usize, item: usize) std.math.Order { - return std.math.order(item, context); + // Since this pcs list is unsorted, we must linear scan for the best index. + const index = i: { + var best: usize = 0; + for (pcs[1..], 1..) |elem_addr, i| { + if (elem_addr == addr) break :i i; + if (elem_addr > addr) continue; + if (elem_addr > pcs[best]) best = i; } - }.order); + break :i best; + }; if (index >= pcs.len) { log.err("unable to find unit test entry address 0x{x} in source locations (range: 0x{x} to 0x{x})", .{ addr, pcs[0], pcs[pcs.len - 1], @@ -678,8 +683,8 @@ fn addEntryPoint(ws: *WebServer, coverage_id: u64, addr: u64) error{ AlreadyRepo if (false) { const sl = coverage_map.source_locations[index]; const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); - log.debug("server found entry point for 0x{x} at {s}:{d}:{d}", .{ - addr, file_name, sl.line, sl.column, + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ + addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], }); } const gpa = ws.gpa;