From 97643c1ecc9621b088807f446f06fe51e2db8fb6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 29 Jul 2024 21:44:38 -0700 Subject: [PATCH] fuzzer: track code coverage from all runs When a unique run is encountered, track it in a bit set memory-mapped into the fuzz directory so it can be observed by other processes, even while the fuzzer is running. --- lib/compiler/test_runner.zig | 2 + lib/fuzzer.zig | 195 +++++++++++++++++++++++++++++++---- 2 files changed, 177 insertions(+), 20 deletions(-) diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index be793376a5..64f6f230fe 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -41,6 +41,7 @@ pub fn main() void { } fba.reset(); + if (builtin.fuzz) fuzzer_init(); if (listen) { return mainServer() catch @panic("internal test runner failure"); @@ -323,6 +324,7 @@ const FuzzerSlice = extern struct { var is_fuzz_test: bool = undefined; extern fn fuzzer_next() FuzzerSlice; +extern fn fuzzer_init() void; pub fn fuzzInput(options: testing.FuzzInputOptions) []const u8 { @disableInstrumentation(); diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 60876e0bfb..4673bf6e31 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -2,6 +2,7 @@ const builtin = @import("builtin"); const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const fatal = std.process.fatal; pub const std_options = .{ .logFn = logOverride, @@ -17,7 +18,7 @@ fn logOverride( ) void { if (builtin.mode != .Debug) return; const f = if (log_file) |f| f else f: { - const f = std.fs.cwd().createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); + const f = fuzzer.dir.createFile("libfuzzer.log", .{}) catch @panic("failed to open fuzzer log file"); log_file = f; break :f f; }; @@ -28,16 +29,17 @@ fn logOverride( export threadlocal var __sancov_lowest_stack: usize = 0; -export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, stop: [*]u8) void { - std.log.debug("__sanitizer_cov_8bit_counters_init start={*}, stop={*}", .{ start, stop }); +var module_count_8bc: usize = 0; +var module_count_pcs: usize = 0; + +export fn __sanitizer_cov_8bit_counters_init(start: [*]u8, end: [*]u8) void { + assert(@atomicRmw(usize, &module_count_8bc, .Add, 1, .monotonic) == 0); + fuzzer.pc_counters = start[0 .. end - start]; } -export fn __sanitizer_cov_pcs_init(pc_start: [*]const usize, pc_end: [*]const usize) void { - std.log.debug("__sanitizer_cov_pcs_init pc_start={*}, pc_end={*}", .{ pc_start, pc_end }); - fuzzer.pc_range = .{ - .start = @intFromPtr(pc_start), - .end = @intFromPtr(pc_start), - }; +export fn __sanitizer_cov_pcs_init(start: [*]const Fuzzer.FlaggedPc, end: [*]const Fuzzer.FlaggedPc) void { + assert(@atomicRmw(usize, &module_count_pcs, .Add, 1, .monotonic) == 0); + fuzzer.flagged_pcs = start[0 .. end - start]; } export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { @@ -102,11 +104,25 @@ const Fuzzer = struct { gpa: Allocator, rng: std.Random.DefaultPrng, input: std.ArrayListUnmanaged(u8), - pc_range: PcRange, - count: usize, + flagged_pcs: []const FlaggedPc, + pc_counters: []u8, + n_runs: usize, recent_cases: RunMap, deduplicated_runs: usize, + /// Data collected from code coverage instrumentation from one execution of + /// the test function. coverage: Coverage, + /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. + /// Stored in a memory-mapped file so that it can be shared with other + /// processes and viewed while the fuzzer is running. + seen_pcs: MemoryMappedList, + dir: std.fs.Dir, + + const SeenPcsHeader = extern struct { + n_runs: usize, + pcs_len: usize, + lowest_stack: usize, + }; const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); @@ -161,9 +177,12 @@ const Fuzzer = struct { } }; - const PcRange = struct { - start: usize, - end: usize, + const FlaggedPc = extern struct { + addr: usize, + flags: packed struct(usize) { + entry: bool, + _: @Type(.{ .Int = .{ .signedness = .unsigned, .bits = @bitSizeOf(usize) - 1 } }), + }, }; const Analysis = struct { @@ -171,6 +190,56 @@ const Fuzzer = struct { id: Run.Id, }; + fn init(f: *Fuzzer, dir: std.fs.Dir) !void { + f.dir = dir; + + // Layout of this file: + // - Header + // - list of PC addresses (usize elements) + // - list of hit flag, 1 bit per address (stored in u8 elements) + const coverage_file = dir.createFile("coverage", .{ + .read = true, + .truncate = false, + }) catch |err| fatal("unable to create coverage file: {s}", .{@errorName(err)}); + const flagged_pcs = f.flagged_pcs; + const n_bitset_elems = (flagged_pcs.len + 7) / 8; + const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; + const existing_len = coverage_file.getEndPos() catch |err| { + fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); + }; + if (existing_len == 0) { + coverage_file.setEndPos(bytes_len) catch |err| { + fatal("unable to set len of coverage file: {s}", .{@errorName(err)}); + }; + } else if (existing_len != bytes_len) { + fatal("incompatible existing coverage file (differing lengths)", .{}); + } + f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| { + fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); + }; + if (existing_len != 0) { + const existing_pcs = std.mem.bytesAsSlice(usize, f.seen_pcs.items[@sizeOf(SeenPcsHeader)..][0 .. flagged_pcs.len * @sizeOf(usize)]); + for (existing_pcs, flagged_pcs, 0..) |old, new, i| { + if (old != new.addr) { + fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ + i, old, new.addr, + }); + } + } + } else { + const header: SeenPcsHeader = .{ + .n_runs = 0, + .pcs_len = flagged_pcs.len, + .lowest_stack = std.math.maxInt(usize), + }; + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); + for (flagged_pcs) |flagged_pc| { + f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&flagged_pc.addr)); + } + f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems); + } + } + fn analyzeLastRun(f: *Fuzzer) Analysis { return .{ .id = f.coverage.run_id_hasher.final(), @@ -194,7 +263,7 @@ const Fuzzer = struct { .score = 0, }, {}); } else { - if (f.count % 1000 == 0) f.dumpStats(); + if (f.n_runs % 1000 == 0) f.dumpStats(); const analysis = f.analyzeLastRun(); const gop = f.recent_cases.getOrPutAssumeCapacity(.{ @@ -217,6 +286,25 @@ const Fuzzer = struct { .input = try gpa.dupe(u8, f.input.items), .score = analysis.score, }; + + // Track code coverage from all runs. + { + const seen_pcs = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + f.flagged_pcs.len * @sizeOf(usize) ..]; + for (seen_pcs, 0..) |*elem, i| { + const byte_i = i / 8; + const mask: u8 = + (@as(u8, @intFromBool(f.pc_counters[byte_i + 0] != 0)) << 0) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 1] != 0)) << 1) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 2] != 0)) << 2) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 3] != 0)) << 3) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 4] != 0)) << 4) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 5] != 0)) << 5) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 6] != 0)) << 6) | + (@as(u8, @intFromBool(f.pc_counters[byte_i + 7] != 0)) << 7); + + _ = @atomicRmw(u8, elem, .Or, mask, .monotonic); + } + } } if (f.recent_cases.entries.len >= 100) { @@ -244,8 +332,12 @@ const Fuzzer = struct { f.input.appendSliceAssumeCapacity(run.input); try f.mutate(); + f.n_runs += 1; + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); + _ = @atomicRmw(usize, &header.lowest_stack, .Min, __sancov_lowest_stack, .monotonic); + @memset(f.pc_counters, 0); f.coverage.reset(); - f.count += 1; return f.input.items; } @@ -257,8 +349,7 @@ const Fuzzer = struct { fn dumpStats(f: *Fuzzer) void { std.log.info("stats: runs={d} deduplicated={d}", .{ - f.count, - f.deduplicated_runs, + f.n_runs, f.deduplicated_runs, }); for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ @@ -303,11 +394,14 @@ var fuzzer: Fuzzer = .{ .gpa = general_purpose_allocator.allocator(), .rng = std.Random.DefaultPrng.init(0), .input = .{}, - .pc_range = .{ .start = 0, .end = 0 }, - .count = 0, + .flagged_pcs = undefined, + .pc_counters = undefined, + .n_runs = 0, .deduplicated_runs = 0, .recent_cases = .{}, .coverage = undefined, + .dir = undefined, + .seen_pcs = undefined, }; export fn fuzzer_next() Fuzzer.Slice { @@ -315,3 +409,64 @@ export fn fuzzer_next() Fuzzer.Slice { error.OutOfMemory => @panic("out of memory"), }); } + +export fn fuzzer_init() void { + if (module_count_8bc == 0) fatal("__sanitizer_cov_8bit_counters_init was never called", .{}); + if (module_count_pcs == 0) fatal("__sanitizer_cov_pcs_init was never called", .{}); + + // TODO: move this to .zig-cache/f + const fuzz_dir = std.fs.cwd().makeOpenPath("f", .{ .iterate = true }) catch |err| { + fatal("unable to open fuzz directory 'f': {s}", .{@errorName(err)}); + }; + fuzzer.init(fuzz_dir) catch |err| fatal("unable to init fuzzer: {s}", .{@errorName(err)}); +} + +/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. +pub const MemoryMappedList = struct { + /// Contents of the list. + /// + /// Pointers to elements in this slice are invalidated by various functions + /// of this ArrayList in accordance with the respective documentation. In + /// all cases, "invalidated" means that the memory has been passed to this + /// allocator's resize or free function. + items: []align(std.mem.page_size) volatile u8, + /// How many bytes this list can hold without allocating additional memory. + capacity: usize, + + pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList { + const ptr = try std.posix.mmap( + null, + capacity, + std.posix.PROT.READ | std.posix.PROT.WRITE, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + return .{ + .items = ptr[0..length], + .capacity = capacity, + }; + } + + /// Append the slice of items to the list. + /// Asserts that the list can hold the additional items. + pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void { + const old_len = l.items.len; + const new_len = old_len + items.len; + assert(new_len <= l.capacity); + l.items.len = new_len; + @memcpy(l.items[old_len..][0..items.len], items); + } + + /// Append a value to the list `n` times. + /// Never invalidates element pointers. + /// The function is inline so that a comptime-known `value` parameter will + /// have better memset codegen in case it has a repeated byte pattern. + /// Asserts that the list can hold the additional items. + pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void { + const new_len = l.items.len + n; + assert(new_len <= l.capacity); + @memset(l.items.ptr[l.items.len..new_len], value); + l.items.len = new_len; + } +};