diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index d4aaf86813..9171129427 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -57,9 +57,6 @@ fn bitsetUsizes(elems: usize) usize { const Executable = struct { /// Tracks the hit count for each pc as updated by the process's instrumentation. pc_counters: []u8, - /// Read-only memory section containing compiled-in constants found from parsing the executable - rodata_addr: usize, - rodata_size: usize, cache_f: std.fs.Dir, /// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed @@ -72,80 +69,12 @@ const Executable = struct { /// Used before this structure is initialized to avoid illegal behavior /// from instrumentation functions being called and using undefined values. pub const preinit: Executable = .{ - .rodata_addr = 0, - .rodata_size = 0, .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section .cache_f = undefined, .shared_seen_pcs = undefined, .pc_digest = undefined, }; - /// Even on error, this initializes rodata_addr and rodata_size to valid values - fn initRodata(self: *Executable) !void { - errdefer { - self.rodata_addr = 0; - self.rodata_size = 0; - } - - const exec_path = std.fs.selfExePathAlloc(gpa) catch |e| - if (e == error.OutOfMemory) @panic("OOM") else return e; - defer gpa.free(exec_path); - const exec_file = try std.fs.cwd().openFile(exec_path, .{}); - defer exec_file.close(); - - switch (builtin.object_format) { - .elf => { - // We use two reader instances since the data they respectively read are - // not next to each other in the file. - // - // Multiple instances is safe since Elf.SectionHeaderIterator always calls - // seekTo (which we also use to arbitrarily set the index) and we always - // call seekTo to arbitrarily read from the string table. - var r_buf: [4096]u8 = undefined; - var r = exec_file.reader(&r_buf); - var str_r_buf: [4096]u8 = undefined; - var str_r = exec_file.reader(&str_r_buf); - - const ehdr: std.elf.Header = try .read(&r.interface); - if (ehdr.shstrndx == 0) return error.NoElfStringTable; - var shdr_it = ehdr.iterateSectionHeaders(&r); - - shdr_it.index = ehdr.shstrndx; - const str_tab_shdr = try shdr_it.next() orelse return error.InvalidElfSection; - const str_tab_off = str_tab_shdr.sh_offset; - - shdr_it.index = 0; - while (try shdr_it.next()) |shdr| { - const flags: packed struct { - write: bool, - alloc: bool, - execinstr: bool, - } = @bitCast(@as(u3, @truncate(shdr.sh_flags))); - if (shdr.sh_addr == 0 or shdr.sh_size == 0 or flags != @TypeOf(flags){ - .alloc = true, - .write = false, - .execinstr = false, - }) continue; - - const rodata_name = ".rodata\x00"; - try str_r.seekTo(try math.add(u64, str_tab_off, shdr.sh_name)); - const section_name = str_r.interface.take(rodata_name.len) catch return r.err.?; - if (!std.mem.eql(u8, section_name, rodata_name)) - continue; - - const addr = math.cast(usize, shdr.sh_addr) orelse return error.Overflow; - const size = math.cast(usize, shdr.sh_size) orelse return error.Overflow; - _ = try math.add(usize, addr, size); // make sure there is no overflow - self.rodata_addr = addr; - self.rodata_size = size; - return; - } - return error.NoRodataSection; - }, - else => return error.UnsupportedObjectFormat, - } - } - fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList { const pc_bitset_usizes = bitsetUsizes(pcs.len); const coverage_file_name = std.fmt.hex(pc_digest); @@ -284,11 +213,6 @@ const Executable = struct { .{ self.pc_counters.len, pcs.len }, ); - self.initRodata() catch |e| if (e != error.UnsupportedObjectFormat) std.log.err( - \\failed to enumerate read-only memory: {t} - \\efficiency will be severly reduced - , .{e}); - self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs)); self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest); @@ -322,33 +246,22 @@ const Executable = struct { }; }; -/// Data gathered from instrumentation functions -/// Seperate from Executable since its state is resetable and changes -/// Seperate from Fuzzer since it may be needed before fuzzing starts +/// Data gathered from instrumentation functions. +/// Seperate from Executable since its state is resetable and changes. +/// Seperate from Fuzzer since it may be needed before fuzzing starts. const Instrumentation = struct { /// Bitset of seen pcs across all runs excluding fresh pcs. /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using /// it which causes contention and unrelated pcs to our campaign being set. seen_pcs: []usize, - /// Bitset of seen rodata bytes read across all runs - seen_rodata_loads: []usize, - - /// Bitset of run's read bytes that weren't present in seen_loads - /// Elements are always zero if !any_new_data_loads - new_rodata_loads: []usize, - any_new_rodata_loads: bool, /// Stores a fresh input's new pcs fresh_pcs: []usize, - /// Stores a fresh input's new reads - /// Elements are always zero if !any_fresh_rodata_loads - fresh_rodata_loads: []usize, - any_fresh_rodata_loads: bool, /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx /// have been called from and have had their already been added to const_x_vals const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty, - /// Values that have been constant operands in comparisons, switch cases, or memory reads + /// Values that have been constant operands in comparisons and switch cases. /// There may be duplicates in this array if they came from different addresses, which is /// fine as they are likely more important and hence more likely to be selected. const_vals2: std.ArrayListUnmanaged(u16) = .empty, @@ -361,12 +274,7 @@ const Instrumentation = struct { /// from instrumentation functions being called and using undefined values. pub const preinit: Instrumentation = .{ .seen_pcs = undefined, // currently only updated by `Fuzzer` - .seen_rodata_loads = undefined, - .new_rodata_loads = undefined, - .any_new_rodata_loads = undefined, .fresh_pcs = undefined, - .fresh_rodata_loads = undefined, - .any_fresh_rodata_loads = undefined, }; pub fn depreinit(self: *Instrumentation) void { @@ -379,20 +287,14 @@ const Instrumentation = struct { pub fn init() Instrumentation { const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len); - const rodata_bitset_usizes = bitsetUsizes(exec.rodata_size); - const alloc_usizes = pc_bitset_usizes * 2 + rodata_bitset_usizes * 3; + const alloc_usizes = pc_bitset_usizes * 2; const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM"); var fba_ctx: std.heap.FixedBufferAllocator = .init(buf); const fba = fba_ctx.allocator(); var self: Instrumentation = .{ .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable, - .seen_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable, - .new_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable, - .any_new_rodata_loads = undefined, .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable, - .fresh_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable, - .any_fresh_rodata_loads = undefined, }; self.reset(); return self; @@ -400,12 +302,7 @@ const Instrumentation = struct { pub fn reset(self: *Instrumentation) void { @memset(self.seen_pcs, 0); - @memset(self.seen_rodata_loads, 0); - @memset(self.new_rodata_loads, 0); - self.any_new_rodata_loads = false; @memset(self.fresh_pcs, 0); - @memset(self.fresh_rodata_loads, 0); - self.any_fresh_rodata_loads = false; self.const_pcs.clearRetainingCapacity(); self.const_vals2.clearRetainingCapacity(); self.const_vals4.clearRetainingCapacity(); @@ -418,16 +315,7 @@ const Instrumentation = struct { return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing; } - pub fn clearNewRodataLoads(self: *Instrumentation) void { - if (self.any_new_rodata_loads) { - @memset(self.new_rodata_loads, 0); - self.any_new_rodata_loads = false; - } - } - pub fn isFresh(self: *Instrumentation) bool { - if (self.any_new_rodata_loads) return true; - var hit_pcs = exec.pcBitsetIterator(); for (self.seen_pcs) |seen_pcs| { if (hit_pcs.next() & ~seen_pcs != 0) return true; @@ -436,38 +324,24 @@ const Instrumentation = struct { return false; } - /// Updates fresh_pcs and fresh_rodata_loads - /// any_new_rodata_loads and elements of new_rodata_loads are unspecified - /// afterwards, but still valid. + /// Updates `fresh_pcs` pub fn setFresh(self: *Instrumentation) void { var hit_pcs = exec.pcBitsetIterator(); for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| { fresh_pcs.* = hit_pcs.next() & ~seen_pcs; } - - mem.swap([]usize, &self.fresh_rodata_loads, &self.new_rodata_loads); - mem.swap(bool, &self.any_fresh_rodata_loads, &self.any_new_rodata_loads); } - /// Returns if exec.pc_counters and new_rodata_loads are the same or a superset of fresh_pcs and - /// fresh_rodata_loads respectively. + /// Returns if `exec.pc_counters` is a superset of `fresh_pcs`. pub fn atleastFresh(self: *Instrumentation) bool { var hit_pcs = exec.pcBitsetIterator(); for (self.fresh_pcs) |fresh_pcs| { if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false; } - - if (self.any_fresh_rodata_loads) { - if (!self.any_new_rodata_loads) return false; - for (self.new_rodata_loads, self.fresh_rodata_loads) |n, f| { - if (n & f != f) return false; - } - } - return true; } - /// Updates based off fresh_pcs and fresh_rodata_loads + /// Updates based off `fresh_pcs` fn updateSeen(self: *Instrumentation) void { comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize); const shared_seen_pcs: [*]volatile usize = @ptrCast( @@ -479,11 +353,6 @@ const Instrumentation = struct { if (fresh != 0) _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic); } - - if (self.any_fresh_rodata_loads) { - for (self.seen_rodata_loads, self.fresh_rodata_loads) |*s, f| - s.* |= f; - } } }; @@ -496,8 +365,8 @@ const Fuzzer = struct { /// input. input: MemoryMappedList, - /// Minimized past inputs leading to new pcs or rodata reads. These are randomly mutated in - /// round-robin fashion + /// Minimized past inputs leading to new pc hits. + /// These are randomly mutated in round-robin fashion /// Element zero is always an empty input. It is gauraunteed no other elements are empty. corpus: std.ArrayListUnmanaged([]const u8), corpus_pos: usize, @@ -596,10 +465,9 @@ const Fuzzer = struct { self.run(); inst.setFresh(); inst.updateSeen(); - inst.clearNewRodataLoads(); } - /// Assumes fresh_pcs and fresh_rodata_loads correspond to the input + /// Assumes `fresh_pcs` correspond to the input fn minimizeInput(self: *Fuzzer) void { // The minimization technique is kept relatively simple, we sequentially try to remove each // byte and check that the new pcs and memory loads are still hit. @@ -609,7 +477,6 @@ const Fuzzer = struct { const old = self.input.orderedRemove(i); @memset(exec.pc_counters, 0); - inst.clearNewRodataLoads(); self.run(); if (!inst.atleastFresh()) { @@ -623,11 +490,7 @@ const Fuzzer = struct { } fn run(self: *Fuzzer) void { - // We don't need to clear pc_counters here; all we care about is new hits and not already - // seen hits. Ideally, we wouldn't even have these counters and do something similiar to - // what we do for tracking memory (i.e. a __sanitizer_cov function that updates a flag on a - // new hit.) - assert(!inst.any_new_rodata_loads); + // `pc_counters` is not cleared since only new hits are relevant. mem.bytesAsValue(usize, self.input.items[0..8]).* = mem.nativeToLittle(usize, self.input.items.len - 8); @@ -673,7 +536,6 @@ const Fuzzer = struct { inst.setFresh(); self.minimizeInput(); inst.updateSeen(); - inst.clearNewRodataLoads(); // An empty-input has always been tried, so if an empty input is fresh then the // test has to be non-deterministic. This has to be checked as duplicate empty @@ -796,58 +658,6 @@ export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void { } } -fn genericLoad(T: anytype, ptr: *align(1) const T, comptime opt_const_vals_field: ?[]const u8) void { - const addr = @intFromPtr(ptr); - const off = addr -% exec.rodata_addr; - if (off >= exec.rodata_size) { - @branchHint(.likely); - return; - } - - const i = off / @bitSizeOf(usize); - // Bits are intentionally truncated since the pointer will almost always be aligned - const hit = (@as(usize, (1 << @sizeOf(T)) - 1)) << @intCast(off % @bitSizeOf(usize)); - const new = hit & ~inst.seen_rodata_loads[i]; - if (new == 0) { - @branchHint(.likely); - return; - } - - inst.new_rodata_loads[i] |= new; - inst.any_new_rodata_loads = true; - - if (opt_const_vals_field) |const_vals_field| { - // This may have already been hit and this run is just being used for evaluating the - // input, in which case we do not want to readd the same value. - if (inst.any_fresh_rodata_loads) { - @branchHint(.unlikely); - if (new & ~inst.fresh_rodata_loads[i] == 0) - return; - } - @field(inst, const_vals_field).append(gpa, ptr.*) catch @panic("OOM"); - } -} - -export fn __sanitizer_cov_load1(ptr: *align(1) const u8) void { - genericLoad(u8, ptr, null); -} - -export fn __sanitizer_cov_load2(ptr: *align(1) const u16) void { - genericLoad(u16, ptr, "const_vals2"); -} - -export fn __sanitizer_cov_load4(ptr: *align(1) const u32) void { - genericLoad(u32, ptr, "const_vals4"); -} - -export fn __sanitizer_cov_load8(ptr: *align(1) const u64) void { - genericLoad(u64, ptr, "const_vals8"); -} - -export fn __sanitizer_cov_load16(ptr: *align(1) const u128) void { - genericLoad(u128, ptr, "const_vals16"); -} - export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void { _ = arg1; _ = arg2; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 9715ac876a..9435087edc 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1115,7 +1115,7 @@ pub const Object = struct { .NoPrune = false, // Workaround for https://github.com/llvm/llvm-project/pull/106464 .StackDepth = true, - .TraceLoads = options.fuzz, + .TraceLoads = false, .TraceStores = false, .CollectControlFlow = false, },