diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index e618f72d2f..2a7eeaeb72 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -4,6 +4,7 @@ const builtin = @import("builtin"); const std = @import("std"); const testing = std.testing; const assert = std.debug.assert; +const fuzz_abi = std.Build.abi.fuzz; pub const std_options: std.Options = .{ .logFn = log, @@ -57,7 +58,7 @@ pub fn main() void { fba.reset(); if (builtin.fuzz) { const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument"); - fuzzer_init(FuzzerSlice.fromSlice(cache_dir)); + fuzz_abi.fuzzer_init(.fromSlice(cache_dir)); } if (listen) { @@ -78,7 +79,7 @@ fn mainServer() !void { }); if (builtin.fuzz) { - const coverage_id = fuzzer_coverage_id(); + const coverage_id = fuzz_abi.fuzzer_coverage_id(); try server.serveU64Message(.coverage_id, coverage_id); } @@ -152,14 +153,19 @@ fn mainServer() !void { }); }, .start_fuzzing => { + // This ensures that this code won't be analyzed and hence reference fuzzer symbols + // since they are not present. if (!builtin.fuzz) unreachable; + const index = try server.receiveBody_u32(); const test_fn = builtin.test_functions[index]; const entry_addr = @intFromPtr(test_fn.func); + try server.serveU64Message(.fuzz_start_addr, entry_addr); defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); is_fuzz_test = false; - fuzzer_set_name(test_fn.name.ptr, test_fn.name.len); + fuzz_test_index = index; + test_fn.func() catch |err| switch (err) { error.SkipZigTest => return, else => { @@ -184,6 +190,8 @@ fn mainServer() !void { fn mainTerminal() void { @disableInstrumentation(); + if (builtin.fuzz) @panic("fuzz test requires server"); + const test_fn_list = builtin.test_functions; var ok_count: usize = 0; var skip_count: usize = 0; @@ -333,28 +341,8 @@ pub fn mainSimple() anyerror!void { if (failed != 0) std.process.exit(1); } -const FuzzerSlice = extern struct { - ptr: [*]const u8, - len: usize, - - /// Inline to avoid fuzzer instrumentation. - inline fn toSlice(s: FuzzerSlice) []const u8 { - return s.ptr[0..s.len]; - } - - /// Inline to avoid fuzzer instrumentation. - inline fn fromSlice(s: []const u8) FuzzerSlice { - return .{ .ptr = s.ptr, .len = s.len }; - } -}; - var is_fuzz_test: bool = undefined; - -extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void; -extern fn fuzzer_init(cache_dir: FuzzerSlice) void; -extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void; -extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.c) void) void; -extern fn fuzzer_coverage_id() u64; +var fuzz_test_index: u32 = undefined; pub fn fuzz( context: anytype, @@ -385,12 +373,12 @@ pub fn fuzz( const global = struct { var ctx: @TypeOf(context) = undefined; - fn fuzzer_one(input_ptr: [*]const u8, input_len: usize) callconv(.c) void { + fn test_one(input: fuzz_abi.Slice) callconv(.c) void { @disableInstrumentation(); testing.allocator_instance = .{}; defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); log_err_count = 0; - testOne(ctx, input_ptr[0..input_len]) catch |err| switch (err) { + testOne(ctx, input.toSlice()) catch |err| switch (err) { error.SkipZigTest => return, else => { std.debug.lockStdErr(); @@ -411,10 +399,11 @@ pub fn fuzz( testing.allocator_instance = .{}; defer testing.allocator_instance = prev_allocator_state; - for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len); - global.ctx = context; - fuzzer_start(&global.fuzzer_one); + fuzz_abi.fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name)); + for (options.corpus) |elem| + fuzz_abi.fuzzer_new_input(.fromSlice(elem)); + fuzz_abi.fuzzer_main(); return; } diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index ccedd363f6..9171129427 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -1,531 +1,1274 @@ const builtin = @import("builtin"); const std = @import("std"); -const Allocator = std.mem.Allocator; +const mem = std.mem; +const math = std.math; +const Allocator = mem.Allocator; const assert = std.debug.assert; -const fatal = std.process.fatal; -const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader; +const panic = std.debug.panic; +const abi = std.Build.abi.fuzz; +const native_endian = builtin.cpu.arch.endian(); pub const std_options = std.Options{ .logFn = logOverride, }; -var log_file_buffer: [256]u8 = undefined; -var log_file_writer: ?std.fs.File.Writer = null; - fn logOverride( comptime level: std.log.Level, comptime scope: @Type(.enum_literal), comptime format: []const u8, args: anytype, ) void { - const fw = if (log_file_writer) |*f| f else f: { - const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch - @panic("failed to open fuzzer log file"); - log_file_writer = f.writer(&log_file_buffer); - break :f &log_file_writer.?; - }; + const f = log_f orelse + panic("attempt to use log before initialization, message:\n" ++ format, args); + f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e}); + defer f.unlock(); + + var buf: [256]u8 = undefined; + var fw = f.writer(&buf); + const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e}); + fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e}); + const prefix1 = comptime level.asText(); const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; - fw.interface.print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch - @panic("failed to write to fuzzer log"); - fw.interface.flush() catch @panic("failed to flush fuzzer log"); + fw.interface.print( + "[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n", + .{current_test_name orelse "setup"} ++ args, + ) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?}); + fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?}); } -/// Helps determine run uniqueness in the face of recursion. -export threadlocal var __sancov_lowest_stack: usize = 0; - -export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void { - handleCmp(@returnAddress(), arg1, arg2); -} - -export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void { - const pc = @returnAddress(); - const len = cases_ptr[0]; - const val_size_in_bits = cases_ptr[1]; - const cases = cases_ptr[2..][0..len]; - fuzzer.traceValue(pc ^ val); - _ = val_size_in_bits; - _ = cases; - //std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{ - // pc, val, val_size_in_bits, cases.len, - //}); -} - -export fn __sanitizer_cov_trace_pc_indir(callee: usize) void { - // Not valuable because we already have pc tracing via 8bit counters. - _ = callee; - //const pc = @returnAddress(); - //fuzzer.traceValue(pc ^ callee); - //std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee }); -} -export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void { - // clang will emit a call to this function when compiling with code coverage instrumentation. - // however fuzzer_init() does not need this information, since it directly reads from the symbol table. - _ = start; - _ = end; -} -export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void { - // clang will emit a call to this function when compiling with code coverage instrumentation. - // however fuzzer_init() does not need this information, since it directly reads from the symbol table. - _ = start; - _ = end; -} - -fn handleCmp(pc: usize, arg1: u64, arg2: u64) void { - fuzzer.traceValue(pc ^ arg1 ^ arg2); - //std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 }); -} - -const Fuzzer = struct { - rng: std.Random.DefaultPrng, - pcs: []const usize, - pc_counters: []u8, - n_runs: usize, - traced_comparisons: std.AutoArrayHashMapUnmanaged(usize, void), - /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. - /// Stored in a memory-mapped file so that it can be shared with other - /// processes and viewed while the fuzzer is running. - seen_pcs: MemoryMappedList, - cache_dir: std.fs.Dir, - /// Identifies the file name that will be used to store coverage - /// information, available to other processes. - coverage_id: u64, - unit_test_name: []const u8, - - /// The index corresponds to the file name within the f/ subdirectory. - /// The string is the input. - /// This data is read-only; it caches what is on the filesystem. - corpus: std.ArrayListUnmanaged(Input), - corpus_directory: std.Build.Cache.Directory, - - /// The next input that will be given to the testOne function. When the - /// current process crashes, this memory-mapped file is used to recover the - /// input. - /// - /// The file size corresponds to the capacity. The length is not stored - /// and that is the next thing to work on! - input: MemoryMappedList, - - const Input = struct { - bytes: []u8, - last_traced_comparison: usize, - }; - - const Slice = extern struct { - ptr: [*]const u8, - len: usize, - - fn toZig(s: Slice) []const u8 { - return s.ptr[0..s.len]; - } - - fn fromZig(s: []const u8) Slice { - return .{ - .ptr = s.ptr, - .len = s.len, - }; - } - }; - - fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void { - f.cache_dir = cache_dir; - f.pc_counters = pc_counters; - f.pcs = pcs; - - // Choose a file name for the coverage based on a hash of the PCs that will be stored within. - const pc_digest = std.hash.Wyhash.hash(0, std.mem.sliceAsBytes(pcs)); - f.coverage_id = pc_digest; - const hex_digest = std.fmt.hex(pc_digest); - const coverage_file_path = "v/" ++ hex_digest; - - // Layout of this file: - // - Header - // - list of PC addresses (usize elements) - // - list of hit flag, 1 bit per address (stored in u8 elements) - const coverage_file = createFileBail(cache_dir, coverage_file_path, .{ - .read = true, - .truncate = false, - }); - const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize); - comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); - comptime assert(SeenPcsHeader.trailing[1] == .pc_addr); - const bytes_len = @sizeOf(SeenPcsHeader) + - n_bitset_elems * @sizeOf(usize) + - pcs.len * @sizeOf(usize); - const existing_len = coverage_file.getEndPos() catch |err| { - fatal("unable to check len of coverage file: {s}", .{@errorName(err)}); - }; - if (existing_len == 0) { - coverage_file.setEndPos(bytes_len) catch |err| { - fatal("unable to set len of coverage file: {s}", .{@errorName(err)}); - }; - } else if (existing_len != bytes_len) { - fatal("incompatible existing coverage file (differing lengths)", .{}); - } - f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| { - fatal("unable to init coverage memory map: {s}", .{@errorName(err)}); - }; - if (existing_len != 0) { - const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)]; - const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes); - for (existing_pcs, pcs, 0..) |old, new, i| { - if (old != new) { - fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{ - i, old, new, - }); - } - } - } else { - const header: SeenPcsHeader = .{ - .n_runs = 0, - .unique_runs = 0, - .pcs_len = pcs.len, - }; - f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header)); - f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems * @sizeOf(usize)); - f.seen_pcs.appendSliceAssumeCapacity(std.mem.sliceAsBytes(pcs)); - } - } - - fn initNextInput(f: *Fuzzer) void { - while (true) { - const i = f.corpus.items.len; - var buf: [30]u8 = undefined; - const input_sub_path = std.fmt.bufPrint(&buf, "{d}", .{i}) catch unreachable; - const input = f.corpus_directory.handle.readFileAlloc(input_sub_path, gpa, .limited(1 << 31)) catch |err| switch (err) { - error.FileNotFound => { - // Make this one the next input. - const input_file = f.corpus_directory.handle.createFile(input_sub_path, .{ - .exclusive = true, - .truncate = false, - .read = true, - }) catch |e| switch (e) { - error.PathAlreadyExists => continue, - else => fatal("unable to create '{f}{d}: {s}", .{ f.corpus_directory, i, @errorName(err) }), - }; - errdefer input_file.close(); - // Initialize the mmap for the current input. - f.input = MemoryMappedList.create(input_file, 0, std.heap.page_size_max) catch |e| { - fatal("unable to init memory map for input at '{f}{d}': {s}", .{ - f.corpus_directory, i, @errorName(e), - }); - }; - break; - }, - else => fatal("unable to read '{f}{d}': {s}", .{ f.corpus_directory, i, @errorName(err) }), - }; - errdefer gpa.free(input); - f.corpus.append(gpa, .{ - .bytes = input, - .last_traced_comparison = 0, - }) catch |err| oom(err); - } - } - - fn addCorpusElem(f: *Fuzzer, input: []const u8) !void { - try f.corpus.append(gpa, .{ - .bytes = try gpa.dupe(u8, input), - .last_traced_comparison = 0, - }); - } - - fn start(f: *Fuzzer) !void { - const rng = fuzzer.rng.random(); - - // Grab the corpus which is namespaced based on `unit_test_name`. - { - if (f.unit_test_name.len == 0) fatal("test runner never set unit test name", .{}); - const sub_path = try std.fmt.allocPrint(gpa, "f/{s}", .{f.unit_test_name}); - f.corpus_directory = .{ - .handle = f.cache_dir.makeOpenPath(sub_path, .{}) catch |err| - fatal("unable to open corpus directory 'f/{s}': {t}", .{ sub_path, err }), - .path = sub_path, - }; - initNextInput(f); - } - - assert(f.n_runs == 0); - - // If the corpus is empty, synthesize one input. - if (f.corpus.items.len == 0) { - const len = rng.uintLessThanBiased(usize, 200); - const slice = try gpa.alloc(u8, len); - rng.bytes(slice); - f.input.appendSliceAssumeCapacity(slice); - try f.corpus.append(gpa, .{ - .bytes = slice, - .last_traced_comparison = 0, - }); - runOne(f, 0); - } - - while (true) { - const chosen_index = rng.uintLessThanBiased(usize, f.corpus.items.len); - const modification = rng.enumValue(Mutation); - f.mutateAndRunOne(chosen_index, modification); - } - } - - /// `x` represents a possible branch. It is the PC address of the possible - /// branch site, hashed together with the value(s) used that determine to - /// where it branches. - fn traceValue(f: *Fuzzer, x: usize) void { - errdefer |err| oom(err); - try f.traced_comparisons.put(gpa, x, {}); - } - - const Mutation = enum { - remove_byte, - modify_byte, - add_byte, - }; - - fn mutateAndRunOne(f: *Fuzzer, corpus_index: usize, mutation: Mutation) void { - const rng = fuzzer.rng.random(); - f.input.clearRetainingCapacity(); - const old_input = f.corpus.items[corpus_index].bytes; - f.input.ensureTotalCapacity(old_input.len + 1) catch @panic("mmap file resize failed"); - switch (mutation) { - .remove_byte => { - const omitted_index = rng.uintLessThanBiased(usize, old_input.len); - f.input.appendSliceAssumeCapacity(old_input[0..omitted_index]); - f.input.appendSliceAssumeCapacity(old_input[omitted_index + 1 ..]); - }, - .modify_byte => { - const modified_index = rng.uintLessThanBiased(usize, old_input.len); - f.input.appendSliceAssumeCapacity(old_input); - f.input.items[modified_index] = rng.int(u8); - }, - .add_byte => { - const modified_index = rng.uintLessThanBiased(usize, old_input.len); - f.input.appendSliceAssumeCapacity(old_input[0..modified_index]); - f.input.appendAssumeCapacity(rng.int(u8)); - f.input.appendSliceAssumeCapacity(old_input[modified_index..]); - }, - } - runOne(f, corpus_index); - } - - fn runOne(f: *Fuzzer, corpus_index: usize) void { - const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); - - f.traced_comparisons.clearRetainingCapacity(); - @memset(f.pc_counters, 0); - __sancov_lowest_stack = std.math.maxInt(usize); - - fuzzer_one(@volatileCast(f.input.items.ptr), f.input.items.len); - - f.n_runs += 1; - _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); - - // Track code coverage from all runs. - comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); - const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]); - const remainder = f.pcs.len % @bitSizeOf(usize); - const aligned_len = f.pcs.len - remainder; - const seen_pcs = header_end_ptr[0..aligned_len]; - const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]); - const V = @Vector(@bitSizeOf(usize), u8); - const zero_v: V = @splat(0); - var fresh = false; - var superset = true; - - for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| { - const v: V = array.*; - const mask: usize = @bitCast(v != zero_v); - const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic); - fresh = fresh or (prev | mask) != prev; - superset = superset and (prev | mask) != mask; - } - if (remainder > 0) { - const i = pc_counters.len; - const elem = &seen_pcs[i]; - var mask: usize = 0; - for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| { - mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index); - } - const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic); - fresh = fresh or (prev | mask) != prev; - superset = superset and (prev | mask) != mask; - } - - // First check if this is a better version of an already existing - // input, replacing that input. - if (superset or f.traced_comparisons.entries.len >= f.corpus.items[corpus_index].last_traced_comparison) { - const new_input = gpa.realloc(f.corpus.items[corpus_index].bytes, f.input.items.len) catch |err| oom(err); - f.corpus.items[corpus_index] = .{ - .bytes = new_input, - .last_traced_comparison = f.traced_comparisons.count(), - }; - @memcpy(new_input, @volatileCast(f.input.items)); - _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); - return; - } - - if (!fresh) return; - - // Input is already committed to the file system, we just need to open a new file - // for the next input. - // Pre-add it to the corpus list so that it does not get redundantly picked up. - f.corpus.append(gpa, .{ - .bytes = gpa.dupe(u8, @volatileCast(f.input.items)) catch |err| oom(err), - .last_traced_comparison = f.traced_comparisons.entries.len, - }) catch |err| oom(err); - f.input.deinit(); - initNextInput(f); - - // TODO: also mark input as "hot" so it gets prioritized for checking mutations above others. - - _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); - } -}; - -fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File { - return dir.createFile(sub_path, flags) catch |err| switch (err) { - error.FileNotFound => { - const dir_name = std.fs.path.dirname(sub_path).?; - dir.makePath(dir_name) catch |e| { - fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) }); - }; - return dir.createFile(sub_path, flags) catch |e| { - fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) }); - }; - }, - else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }), - }; -} - -fn oom(err: anytype) noreturn { - switch (err) { - error.OutOfMemory => @panic("out of memory"), - } -} - -var debug_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init; - +var debug_allocator: std.heap.DebugAllocator(.{}) = .init; const gpa = switch (builtin.mode) { .Debug => debug_allocator.allocator(), .ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator, }; -var fuzzer: Fuzzer = .{ - .rng = std.Random.DefaultPrng.init(0), - .input = undefined, - .pcs = undefined, - .pc_counters = undefined, - .n_runs = 0, - .cache_dir = undefined, - .seen_pcs = undefined, - .coverage_id = undefined, - .unit_test_name = &.{}, - .corpus = .empty, - .corpus_directory = undefined, - .traced_comparisons = .empty, +/// Part of `exec`, however seperate to allow it to be set before `exec` is. +var log_f: ?std.fs.File = null; +var exec: Executable = .preinit; +var inst: Instrumentation = .preinit; +var fuzzer: Fuzzer = undefined; +var current_test_name: ?[]const u8 = null; + +fn bitsetUsizes(elems: usize) usize { + return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable; +} + +const Executable = struct { + /// Tracks the hit count for each pc as updated by the process's instrumentation. + pc_counters: []u8, + + cache_f: std.fs.Dir, + /// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed + /// while the fuzzer is running. + shared_seen_pcs: MemoryMappedList, + /// Hash of pcs used to uniquely identify the shared coverage file + pc_digest: u64, + + /// A minimal state for this struct which instrumentation can function on. + /// Used before this structure is initialized to avoid illegal behavior + /// from instrumentation functions being called and using undefined values. + pub const preinit: Executable = .{ + .pc_counters = undefined, // instrumentation works off the __sancov_cntrs section + .cache_f = undefined, + .shared_seen_pcs = undefined, + .pc_digest = undefined, + }; + + fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList { + const pc_bitset_usizes = bitsetUsizes(pcs.len); + const coverage_file_name = std.fmt.hex(pc_digest); + comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize); + comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr); + + var v = cache_dir.makeOpenPath("v", .{}) catch |e| + panic("failed to create directory 'v': {t}", .{e}); + defer v.close(); + const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{ + .read = true, + // If we create the file, we want to block other processes while we populate it + .lock = .exclusive, + .exclusive = true, + })) |f| + .{ f, true } + else |e| switch (e) { + error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{ + .mode = .read_write, + .lock = .shared, + }) catch |e2| panic( + "failed to open existing coverage file '{s}': {t}", + .{ &coverage_file_name, e2 }, + ), false }, + else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }), + }; + + const coverage_file_len = @sizeOf(abi.SeenPcsHeader) + + pc_bitset_usizes * @sizeOf(usize) + + pcs.len * @sizeOf(usize); + if (populate) { + defer coverage_file.lock(.shared) catch |e| panic( + "failed to demote lock for coverage file '{s}': {t}", + .{ &coverage_file_name, e }, + ); + var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic( + "failed to init memory map for coverage file '{s}': {t}", + .{ &coverage_file_name, e }, + ); + map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{ + .n_runs = 0, + .unique_runs = 0, + .pcs_len = pcs.len, + })); + map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize)); + map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs)); + return map; + } else { + const size = coverage_file.getEndPos() catch |e| panic( + "failed to stat coverage file '{s}': {t}", + .{ &coverage_file_name, e }, + ); + if (size != coverage_file_len) panic( + "incompatible existing coverage file '{s}' (differing lengths: {} != {})", + .{ &coverage_file_name, size, coverage_file_len }, + ); + + const map = MemoryMappedList.init( + coverage_file, + coverage_file_len, + coverage_file_len, + ) catch |e| panic( + "failed to init memory map for coverage file '{s}': {t}", + .{ &coverage_file_name, e }, + ); + + const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items)); + if (seen_pcs_header.pcs_len != pcs.len) panic( + "incompatible existing coverage file '{s}' (differing pcs length: {} != {})", + .{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len }, + ); + if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic( + "incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})", + .{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] }, + ); + + return map; + } + } + + pub fn init(cache_dir_path: []const u8) Executable { + var self: Executable = undefined; + + const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic( + "failed to open directory '{s}': {t}", + .{ cache_dir_path, e }, + ); + log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e| + panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e}); + self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e| + panic("failed to open directory 'f': {t}", .{e}); + + // Linkers are expected to automatically add symbols prefixed with these for the start and + // end of sections whose names are valid C identifiers. + const ofmt = builtin.object_format; + const section_start_prefix, const section_end_prefix = switch (ofmt) { + .elf => .{ "__start_", "__stop_" }, + .macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" }, + else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"), + }; + + self.pc_counters = blk: { + const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs"; + const pc_counters_start = @extern([*]u8, .{ + .name = pc_counters_start_name, + .linkage = .weak, + }) orelse panic("missing {s} symbol", .{pc_counters_start_name}); + + const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs"; + const pc_counters_end = @extern([*]u8, .{ + .name = pc_counters_end_name, + .linkage = .weak, + }) orelse panic("missing {s} symbol", .{pc_counters_end_name}); + + break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start]; + }; + + const pcs = blk: { + const pcs_start_name = section_start_prefix ++ "__sancov_pcs1"; + const pcs_start = @extern([*]usize, .{ + .name = pcs_start_name, + .linkage = .weak, + }) orelse panic("missing {s} symbol", .{pcs_start_name}); + + const pcs_end_name = section_end_prefix ++ "__sancov_pcs1"; + const pcs_end = @extern([*]usize, .{ + .name = pcs_end_name, + .linkage = .weak, + }) orelse panic("missing {s} symbol", .{pcs_end_name}); + + break :blk pcs_start[0 .. pcs_end - pcs_start]; + }; + + if (self.pc_counters.len != pcs.len) panic( + "pc counters length and pcs length do not match ({} != {})", + .{ self.pc_counters.len, pcs.len }, + ); + + self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs)); + self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest); + + return self; + } + + pub fn pcBitsetIterator(self: Executable) PcBitsetIterator { + return .{ .pc_counters = self.pc_counters }; + } + + /// Iterates over pc_counters returning a bitset for if each of them have been hit + pub const PcBitsetIterator = struct { + index: usize = 0, + pc_counters: []u8, + + pub fn next(self: *PcBitsetIterator) usize { + const rest = self.pc_counters[self.index..]; + if (rest.len >= @bitSizeOf(usize)) { + defer self.index += @bitSizeOf(usize); + const V = @Vector(@bitSizeOf(usize), u8); + return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*)); + } else if (rest.len != 0) { + defer self.index += rest.len; + var res: usize = 0; + for (0.., rest) |bit_index, byte| { + res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index)); + } + return res; + } else unreachable; + } + }; }; +/// Data gathered from instrumentation functions. +/// Seperate from Executable since its state is resetable and changes. +/// Seperate from Fuzzer since it may be needed before fuzzing starts. +const Instrumentation = struct { + /// Bitset of seen pcs across all runs excluding fresh pcs. + /// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using + /// it which causes contention and unrelated pcs to our campaign being set. + seen_pcs: []usize, + + /// Stores a fresh input's new pcs + fresh_pcs: []usize, + + /// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx + /// have been called from and have had their already been added to const_x_vals + const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty, + /// Values that have been constant operands in comparisons and switch cases. + /// There may be duplicates in this array if they came from different addresses, which is + /// fine as they are likely more important and hence more likely to be selected. + const_vals2: std.ArrayListUnmanaged(u16) = .empty, + const_vals4: std.ArrayListUnmanaged(u32) = .empty, + const_vals8: std.ArrayListUnmanaged(u64) = .empty, + const_vals16: std.ArrayListUnmanaged(u128) = .empty, + + /// A minimal state for this struct which instrumentation can function on. + /// Used before this structure is initialized to avoid illegal behavior + /// from instrumentation functions being called and using undefined values. + pub const preinit: Instrumentation = .{ + .seen_pcs = undefined, // currently only updated by `Fuzzer` + .fresh_pcs = undefined, + }; + + pub fn depreinit(self: *Instrumentation) void { + self.const_vals2.deinit(gpa); + self.const_vals4.deinit(gpa); + self.const_vals8.deinit(gpa); + self.const_vals16.deinit(gpa); + self.* = undefined; + } + + pub fn init() Instrumentation { + const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len); + const alloc_usizes = pc_bitset_usizes * 2; + const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM"); + var fba_ctx: std.heap.FixedBufferAllocator = .init(buf); + const fba = fba_ctx.allocator(); + + var self: Instrumentation = .{ + .seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable, + .fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable, + }; + self.reset(); + return self; + } + + pub fn reset(self: *Instrumentation) void { + @memset(self.seen_pcs, 0); + @memset(self.fresh_pcs, 0); + self.const_pcs.clearRetainingCapacity(); + self.const_vals2.clearRetainingCapacity(); + self.const_vals4.clearRetainingCapacity(); + self.const_vals8.clearRetainingCapacity(); + self.const_vals16.clearRetainingCapacity(); + } + + /// If false is returned, then the pc is marked as seen + pub fn constPcSeen(self: *Instrumentation, pc: usize) bool { + return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing; + } + + pub fn isFresh(self: *Instrumentation) bool { + var hit_pcs = exec.pcBitsetIterator(); + for (self.seen_pcs) |seen_pcs| { + if (hit_pcs.next() & ~seen_pcs != 0) return true; + } + + return false; + } + + /// Updates `fresh_pcs` + pub fn setFresh(self: *Instrumentation) void { + var hit_pcs = exec.pcBitsetIterator(); + for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| { + fresh_pcs.* = hit_pcs.next() & ~seen_pcs; + } + } + + /// Returns if `exec.pc_counters` is a superset of `fresh_pcs`. + pub fn atleastFresh(self: *Instrumentation) bool { + var hit_pcs = exec.pcBitsetIterator(); + for (self.fresh_pcs) |fresh_pcs| { + if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false; + } + return true; + } + + /// Updates based off `fresh_pcs` + fn updateSeen(self: *Instrumentation) void { + comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize); + const shared_seen_pcs: [*]volatile usize = @ptrCast( + exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr, + ); + + for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| { + seen.* |= fresh; + if (fresh != 0) + _ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic); + } + } +}; + +const Fuzzer = struct { + arena_ctx: std.heap.ArenaAllocator = .init(gpa), + rng: std.Random.DefaultPrng = .init(0), + test_one: abi.TestOne, + /// The next input that will be given to the testOne function. When the + /// current process crashes, this memory-mapped file is used to recover the + /// input. + input: MemoryMappedList, + + /// Minimized past inputs leading to new pc hits. + /// These are randomly mutated in round-robin fashion + /// Element zero is always an empty input. It is gauraunteed no other elements are empty. + corpus: std.ArrayListUnmanaged([]const u8), + corpus_pos: usize, + /// List of past mutations that have led to new inputs. This way, the mutations that are the + /// most effective are the most likely to be selected again. Starts with one of each mutation. + mutations: std.ArrayListUnmanaged(Mutation) = .empty, + + /// Filesystem directory containing found inputs for future runs + corpus_dir: std.fs.Dir, + corpus_dir_idx: usize = 0, + + pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer { + var self: Fuzzer = .{ + .test_one = test_one, + .input = undefined, + .corpus = .empty, + .corpus_pos = 0, + .mutations = .empty, + .corpus_dir = undefined, + }; + const arena = self.arena_ctx.allocator(); + + self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e| + panic("failed to open directory '{s}': {t}", .{ unit_test_name, e }); + self.input = in: { + const f = self.corpus_dir.createFile("in", .{ + .read = true, + .truncate = false, + // In case any other fuzz tests are running under the same test name, + // the input file is exclusively locked to ensures only one proceeds. + .lock = .exclusive, + .lock_nonblocking = true, + }) catch |e| switch (e) { + error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"), + else => panic("failed to create input file 'in': {t}", .{e}), + }; + const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e}); + const map = (if (size < std.heap.page_size_max) + MemoryMappedList.create(f, 8, std.heap.page_size_max) + else + MemoryMappedList.init(f, size, size)) catch |e| + panic("failed to memory map input file 'in': {t}", .{e}); + + // Perform a dry-run of the stored input if there was one in case it might reproduce a + // crash. + const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*); + if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) { + test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len]))); + } + + break :in map; + }; + inst.reset(); + + self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM"); + // Ensure there is never an empty corpus. Additionally, an empty input usually leads to + // new inputs. + self.addInput(&.{}); + + while (true) { + var name_buf: [@sizeOf(usize) * 2]u8 = undefined; + const bytes = self.corpus_dir.readFileAlloc( + std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable, + arena, + .unlimited, + ) catch |e| switch (e) { + error.FileNotFound => break, + else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }), + }; + // No corpus file of length zero will ever be created + if (bytes.len == 0) + panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx}); + self.addInput(bytes); + self.corpus_dir_idx += 1; + } + + return self; + } + + pub fn deinit(self: *Fuzzer) void { + self.input.deinit(); + self.corpus.deinit(gpa); + self.mutations.deinit(gpa); + self.corpus_dir.close(); + self.arena_ctx.deinit(); + self.* = undefined; + } + + pub fn addInput(self: *Fuzzer, bytes: []const u8) void { + self.corpus.append(gpa, bytes) catch @panic("OOM"); + self.input.clearRetainingCapacity(); + self.input.ensureTotalCapacity(8 + bytes.len) catch |e| + panic("could not resize shared input file: {t}", .{e}); + self.input.items.len = 8; + self.input.appendSliceAssumeCapacity(bytes); + self.run(); + inst.setFresh(); + inst.updateSeen(); + } + + /// Assumes `fresh_pcs` correspond to the input + fn minimizeInput(self: *Fuzzer) void { + // The minimization technique is kept relatively simple, we sequentially try to remove each + // byte and check that the new pcs and memory loads are still hit. + var i = self.input.items.len; + while (i != 8) { + i -= 1; + const old = self.input.orderedRemove(i); + + @memset(exec.pc_counters, 0); + self.run(); + + if (!inst.atleastFresh()) { + self.input.insertAssumeCapacity(i, old); + } else { + // This removal may have led to new pcs or memory loads being hit, so we need to + // update them to avoid duplicates. + inst.setFresh(); + } + } + } + + fn run(self: *Fuzzer) void { + // `pc_counters` is not cleared since only new hits are relevant. + + mem.bytesAsValue(usize, self.input.items[0..8]).* = + mem.nativeToLittle(usize, self.input.items.len - 8); + self.test_one(.fromSlice(@volatileCast(self.input.items[8..]))); + + const header = mem.bytesAsValue( + abi.SeenPcsHeader, + exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)], + ); + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); + } + + pub fn cycle(self: *Fuzzer) void { + const input = self.corpus.items[self.corpus_pos]; + self.corpus_pos += 1; + if (self.corpus_pos == self.corpus.items.len) + self.corpus_pos = 0; + + const rng = self.rng.random(); + while (true) { + const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)]; + if (!m.mutate( + rng, + input, + &self.input, + self.corpus.items, + inst.const_vals2.items, + inst.const_vals4.items, + inst.const_vals8.items, + inst.const_vals16.items, + )) continue; + + self.run(); + if (inst.isFresh()) { + @branchHint(.unlikely); + + const header = mem.bytesAsValue( + abi.SeenPcsHeader, + exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)], + ); + _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); + + inst.setFresh(); + self.minimizeInput(); + inst.updateSeen(); + + // An empty-input has always been tried, so if an empty input is fresh then the + // test has to be non-deterministic. This has to be checked as duplicate empty + // entries are not allowed. + if (self.input.items.len - 8 == 0) { + std.log.warn("non-deterministic test (empty input produces different hits)", .{}); + _ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic); + return; + } + + const arena = self.arena_ctx.allocator(); + const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM"); + + self.corpus.append(gpa, bytes) catch @panic("OOM"); + self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM"); + + // Write new corpus to cache + var name_buf: [@sizeOf(usize) * 2]u8 = undefined; + self.corpus_dir.writeFile(.{ + .sub_path = std.fmt.bufPrint( + &name_buf, + "{x}", + .{self.corpus_dir_idx}, + ) catch unreachable, + .data = bytes, + }) catch |e| panic( + "failed to write corpus file '{x}': {t}", + .{ self.corpus_dir_idx, e }, + ); + self.corpus_dir_idx += 1; + } + + break; + } + } +}; + +/// Instrumentation must not be triggered before this function is called +export fn fuzzer_init(cache_dir_path: abi.Slice) void { + inst.depreinit(); + exec = .init(cache_dir_path.toSlice()); + inst = .init(); +} + /// Invalid until `fuzzer_init` is called. export fn fuzzer_coverage_id() u64 { - return fuzzer.coverage_id; + return exec.pc_digest; } -var fuzzer_one: *const fn (input_ptr: [*]const u8, input_len: usize) callconv(.c) void = undefined; - -export fn fuzzer_start(testOne: @TypeOf(fuzzer_one)) void { - fuzzer_one = testOne; - fuzzer.start() catch |err| oom(err); +/// fuzzer_init must be called beforehand +export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void { + current_test_name = unit_test_name.toSlice(); + fuzzer = .init(test_one, unit_test_name.toSlice()); } -export fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void { - fuzzer.unit_test_name = name_ptr[0..name_len]; +/// fuzzer_init_test must be called beforehand +/// The callee owns the memory of bytes and must not free it until the fuzzer is finished. +export fn fuzzer_new_input(bytes: abi.Slice) void { + // An entry of length zero is always added and duplicates of it are not allowed. + if (bytes.len != 0) + fuzzer.addInput(bytes.toSlice()); } -export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { - // Linkers are expected to automatically add `__start_
` and - // `__stop_
` symbols when section names are valid C identifiers. +/// fuzzer_init_test must be called first +export fn fuzzer_main() void { + while (true) { + fuzzer.cycle(); + } +} - const ofmt = builtin.object_format; +/// Helps determine run uniqueness in the face of recursion. +/// Currently not used by the fuzzer. +export threadlocal var __sancov_lowest_stack: usize = 0; - const start_symbol_prefix: []const u8 = if (ofmt == .macho) - "\x01section$start$__DATA$__" - else - "__start___"; - const end_symbol_prefix: []const u8 = if (ofmt == .macho) - "\x01section$end$__DATA$__" - else - "__stop___"; +/// Inline since the return address of the callee is required +inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void { + if (!inst.constPcSeen(@returnAddress())) { + @branchHint(.unlikely); + @field(inst, const_vals_field).append(gpa, val) catch @panic("OOM"); + } +} - const pc_counters_start_name = start_symbol_prefix ++ "sancov_cntrs"; - const pc_counters_start = @extern([*]u8, .{ - .name = pc_counters_start_name, - .linkage = .weak, - }) orelse fatal("missing {s} symbol", .{pc_counters_start_name}); +export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void { + _ = const_arg; + _ = arg; +} - const pc_counters_end_name = end_symbol_prefix ++ "sancov_cntrs"; - const pc_counters_end = @extern([*]u8, .{ - .name = pc_counters_end_name, - .linkage = .weak, - }) orelse fatal("missing {s} symbol", .{pc_counters_end_name}); +export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void { + _ = arg; + genericConstCmp(u16, const_arg, "const_vals2"); +} - const pc_counters = pc_counters_start[0 .. pc_counters_end - pc_counters_start]; +export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void { + _ = arg; + genericConstCmp(u32, const_arg, "const_vals4"); +} - const pcs_start_name = start_symbol_prefix ++ "sancov_pcs1"; - const pcs_start = @extern([*]usize, .{ - .name = pcs_start_name, - .linkage = .weak, - }) orelse fatal("missing {s} symbol", .{pcs_start_name}); +export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void { + _ = arg; + genericConstCmp(u64, const_arg, "const_vals8"); +} - const pcs_end_name = end_symbol_prefix ++ "sancov_pcs1"; - const pcs_end = @extern([*]usize, .{ - .name = pcs_end_name, - .linkage = .weak, - }) orelse fatal("missing {s} symbol", .{pcs_end_name}); +export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void { + _ = val; + if (!inst.constPcSeen(@returnAddress())) { + @branchHint(.unlikely); + const case_bits = cases[1]; + const cases_slice = cases[2..][0..cases[0]]; + switch (case_bits) { + // 8-bit cases are ignored because they are likely to be randomly generated + 0...8 => {}, + 9...16 => for (cases_slice) |c| + inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"), + 17...32 => for (cases_slice) |c| + inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"), + 33...64 => for (cases_slice) |c| + inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"), + else => {}, // Should be impossible + } + } +} - const pcs = pcs_start[0 .. pcs_end - pcs_start]; +export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void { + _ = arg1; + _ = arg2; +} - const cache_dir_path = cache_dir_struct.toZig(); - const cache_dir = if (cache_dir_path.len == 0) - std.fs.cwd() - else - std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| { - fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) }); +export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void { + _ = arg1; + _ = arg2; +} + +export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void { + _ = arg1; + _ = arg2; +} + +export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void { + _ = arg1; + _ = arg2; +} + +export fn __sanitizer_cov_trace_pc_indir(callee: usize) void { + // Not valuable because we already have pc tracing via 8bit counters. + _ = callee; +} +export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void { + // clang will emit a call to this function when compiling with code coverage instrumentation. + // however, fuzzer_init() does not need this information since it directly reads from the + // symbol table. + _ = start; + _ = end; +} +export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void { + // clang will emit a call to this function when compiling with code coverage instrumentation. + // however, fuzzer_init() does not need this information since it directly reads from the + // symbol table. + _ = start; + _ = end; +} + +/// Copy all of source into dest at position 0. +/// If the slices overlap, dest.ptr must be <= src.ptr. +fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void { + for (dest, source) |*d, s| d.* = s; +} + +/// Copy all of source into dest at position 0. +/// If the slices overlap, dest.ptr must be >= src.ptr. +fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void { + var i = source.len; + while (i > 0) { + i -= 1; + dest[i] = source[i]; + } +} + +const Mutation = enum { + /// Applies .insert_*_span, .push_*_span + /// For wtf-8, this limits code units, not code points + const max_insert_len = 12; + /// Applies to .insert_large_*_span and .push_large_*_span + /// 4096 is used as it is a common sector size + const max_large_insert_len = 4096; + /// Applies to .delete_span and .pop_span + const max_delete_len = 16; + /// Applies to .set_*span, .move_span, .set_existing_span + const max_set_len = 12; + const max_replicate_len = 64; + const AddValue = i6; + const SmallValue = i10; + + delete_byte, + delete_span, + /// Removes the last byte from the input + pop_byte, + pop_span, + /// Inserts a group of bytes which is already in the input and removes the original copy. + move_span, + /// Replaces a group of bytes in the input with another group of bytes in the input + set_existing_span, + insert_existing_span, + push_existing_span, + set_rng_byte, + set_rng_span, + insert_rng_byte, + insert_rng_span, + /// Adds a byte to the end of the input + push_rng_byte, + push_rng_span, + set_zero_byte, + set_zero_span, + insert_zero_byte, + insert_zero_span, + push_zero_byte, + push_zero_span, + /// Inserts a lot of zeros to the end of the input + /// This is intended to work with fuzz tests that require data in (large) blocks + push_large_zero_span, + /// Inserts a group of ascii printable character + insert_print_span, + /// Inserts a group of character from a...z, A...Z, 0...9, _, and ' ' + insert_common_span, + /// Inserts a group of ascii digits possibly preceded by a `-` + insert_integer, + /// Code units are evenly distributed between one to four + insert_wtf8_char, + insert_wtf8_span, + /// Inserts a group of bytes from another input + insert_splice_span, + // utf16 is not yet included since insertion of random bytes should adaquetly check + // BMP character, surrogate handling, and occasionally chacters outside of the BMP. + set_print_span, + set_common_span, + set_splice_span, + /// Similar to set_splice_span, but the bytes are copied to the same index instead of a random + replicate_splice_span, + push_print_span, + push_common_span, + push_integer, + push_wtf8_char, + push_wtf8_span, + push_splice_span, + /// Clears a random amount of high bits of a byte + truncate_8, + truncate_16le, + truncate_16be, + truncate_32le, + truncate_32be, + truncate_64le, + truncate_64be, + /// Flips a random bit + xor_1, + /// Swaps up to three bits of a byte biased to less bits + xor_few_8, + /// Swaps up to six bits of a 16-bit value biased to less bits + xor_few_16, + /// Swaps up to nine bits of a 32-bit value biased to less bits + xor_few_32, + /// Swaps up to twelve bits of 64-bit value biased to less bits + xor_few_64, + /// Adds to a byte a value of type AddValue + add_8, + add_16le, + add_16be, + add_32le, + add_32be, + add_64le, + add_64be, + /// Sets a 16-bit little-endian value to a value of type SmallValue + set_small_16le, + set_small_16be, + set_small_32le, + set_small_32be, + set_small_64le, + set_small_64be, + insert_small_16le, + insert_small_16be, + insert_small_32le, + insert_small_32be, + insert_small_64le, + insert_small_64be, + push_small_16le, + push_small_16be, + push_small_32le, + push_small_32be, + push_small_64le, + push_small_64be, + set_const_16, + set_const_32, + set_const_64, + set_const_128, + insert_const_16, + insert_const_32, + insert_const_64, + insert_const_128, + push_const_16, + push_const_32, + push_const_64, + push_const_128, + /// Sets a byte with up to three bits set biased to less bits + set_few_8, + /// Sets a 16-bit value with up to six bits set biased to less bits + set_few_16, + /// Sets a 32-bit value with up to nine bits set biased to less bits + set_few_32, + /// Sets a 64-bit value with up to twelve bits set biased to less bits + set_few_64, + insert_few_8, + insert_few_16, + insert_few_32, + insert_few_64, + push_few_8, + push_few_16, + push_few_32, + push_few_64, + /// Randomizes a random contigous group of bits in a byte + packed_set_rng_8, + packed_set_rng_16le, + packed_set_rng_16be, + packed_set_rng_32le, + packed_set_rng_32be, + packed_set_rng_64le, + packed_set_rng_64be, + + fn fewValue(rng: std.Random, T: type, comptime bits: u16) T { + var result: T = 0; + var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits); + while (remaining_bits > 0) { + result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T))); + remaining_bits -= 1; + } + return result; + } + + /// Returns if the mutation was applicable to the input + pub fn mutate( + mutation: Mutation, + rng: std.Random, + in: []const u8, + out: *MemoryMappedList, + corpus: []const []const u8, + const_vals2: []const u16, + const_vals4: []const u32, + const_vals8: []const u64, + const_vals16: []const u128, + ) bool { + out.clearRetainingCapacity(); + const new_capacity = 8 + in.len + @max( + 16, // builtin 128 value + Mutation.max_insert_len, + Mutation.max_large_insert_len, + ); + out.ensureTotalCapacity(new_capacity) catch |e| + panic("could not resize shared input file: {t}", .{e}); + out.items.len = 8; // Length field + + const applied = switch (mutation) { + inline else => |m| m.comptimeMutate( + rng, + in, + out, + corpus, + const_vals2, + const_vals4, + const_vals8, + const_vals16, + ), + }; + if (!applied) + assert(out.items.len == 8) + else + assert(out.items.len <= new_capacity); + return applied; + } + + /// Assumes out has already been cleared + fn comptimeMutate( + comptime mutation: Mutation, + rng: std.Random, + in: []const u8, + out: *MemoryMappedList, + corpus: []const []const u8, + const_vals2: []const u16, + const_vals4: []const u32, + const_vals8: []const u64, + const_vals16: []const u128, + ) bool { + const Class = enum { new, remove, rmw, move_span, replicate_splice_span }; + const class: Class, const class_ctx = switch (mutation) { + // zig fmt: off + .move_span => .{ .move_span, null }, + .replicate_splice_span => .{ .replicate_splice_span, null }, + + .delete_byte => .{ .remove, .{ .delete, 1 } }, + .delete_span => .{ .remove, .{ .delete, max_delete_len } }, + + .pop_byte => .{ .remove, .{ .pop, 1 } }, + .pop_span => .{ .remove, .{ .pop, max_delete_len } }, + + .set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } }, + .set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } }, + .set_rng_span => .{ .new, .{ .set , 1, .rng , .many } }, + .set_zero_span => .{ .new, .{ .set , 1, .zero , .many } }, + .set_common_span => .{ .new, .{ .set , 1, .common , .many } }, + .set_print_span => .{ .new, .{ .set , 1, .print , .many } }, + .set_existing_span => .{ .new, .{ .set , 2, .existing, .many } }, + .set_splice_span => .{ .new, .{ .set , 1, .splice , .many } }, + .set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } }, + .set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } }, + .set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } }, + .set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } }, + .set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } }, + .set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } }, + .set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } }, + .set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } }, + .set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } }, + .set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } }, + .set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } }, + .set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } }, + .set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } }, + .set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } }, + + .insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } }, + .insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } }, + .insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } }, + .insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } }, + .insert_print_span => .{ .new, .{ .insert, 0, .print , .many } }, + .insert_common_span => .{ .new, .{ .insert, 0, .common , .many } }, + .insert_integer => .{ .new, .{ .insert, 0, .integer , .many } }, + .insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } }, + .insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } }, + .insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } }, + .insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } }, + .insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } }, + .insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } }, + .insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } }, + .insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } }, + .insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } }, + .insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } }, + .insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } }, + .insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } }, + .insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } }, + .insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } }, + .insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } }, + .insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } }, + .insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } }, + .insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } }, + + .push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } }, + .push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } }, + .push_rng_span => .{ .new, .{ .push , 0, .rng , .many } }, + .push_zero_span => .{ .new, .{ .push , 0, .zero , .many } }, + .push_print_span => .{ .new, .{ .push , 0, .print , .many } }, + .push_common_span => .{ .new, .{ .push , 0, .common , .many } }, + .push_integer => .{ .new, .{ .push , 0, .integer , .many } }, + .push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } }, + .push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } }, + .push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } }, + .push_existing_span => .{ .new, .{ .push , 1, .existing, .many } }, + .push_splice_span => .{ .new, .{ .push , 0, .splice , .many } }, + .push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } }, + .push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } }, + .push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } }, + .push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } }, + .push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } }, + .push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } }, + .push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } }, + .push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } }, + .push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } }, + .push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } }, + .push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } }, + .push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } }, + .push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } }, + .push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } }, + + .xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } }, + .xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } }, + .xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } }, + .xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } }, + .xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } }, + + .truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } }, + .truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } }, + .truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } }, + .truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } }, + .truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } }, + .truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } }, + .truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } }, + + .add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } }, + .add_16le => .{ .rmw, .{ .add , i16, .little , {} } }, + .add_32le => .{ .rmw, .{ .add , i32, .little , {} } }, + .add_64le => .{ .rmw, .{ .add , i64, .little , {} } }, + .add_16be => .{ .rmw, .{ .add , i16, .big , {} } }, + .add_32be => .{ .rmw, .{ .add , i32, .big , {} } }, + .add_64be => .{ .rmw, .{ .add , i64, .big , {} } }, + + .packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } }, + .packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } }, + .packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } }, + .packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } }, + .packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } }, + .packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } }, + .packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } }, + // zig fmt: on }; - fuzzer.init(cache_dir, pc_counters, pcs) catch |err| - fatal("unable to init fuzzer: {s}", .{@errorName(err)}); -} + switch (class) { + .new => { + const op: enum { + set, + insert, + push, -export fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void { - fuzzer.addCorpusElem(input_ptr[0..input_len]) catch |err| - fatal("failed to add corpus element: {s}", .{@errorName(err)}); -} + pub fn maxLen(comptime op: @This(), in_len: usize) usize { + return switch (op) { + .set => @min(in_len, max_set_len), + .insert, .push => max_insert_len, + }; + } + }, const min_in_len, const data: enum { + rng, + zero, + common, + print, + integer, + wtf8, + existing, + splice, + @"const", + small, + few, + }, const data_ctx = class_ctx; + const Size = enum { one, many, large }; + if (in.len < min_in_len) return false; + if (data == .@"const" and data_ctx.len == 0) return false; + + const splice_i = if (data == .splice) blk: { + // Element zero always holds an empty input, so we do not select it + if (corpus.len == 1) return false; + break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len); + } else undefined; + + // Only needs to be followed for set + const len = switch (data) { + else => switch (@as(Size, data_ctx)) { + .one => 1, + .many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)), + .large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len), + }, + .wtf8 => undefined, // varies by size of each code unit + .splice => rng.intRangeAtMostBiased(usize, 1, @min( + corpus[splice_i].len, + op.maxLen(in.len), + )), + .existing => rng.intRangeAtMostBiased(usize, 1, @min( + in.len, + op.maxLen(in.len), + )), + .@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child), + .small, .few => @sizeOf(data_ctx[0]), + }; + + const i = switch (op) { + .set => rng.uintAtMostBiased(usize, in.len - len), + .insert => rng.uintAtMostBiased(usize, in.len), + .push => in.len, + }; + + out.appendSliceAssumeCapacity(in[0..i]); + switch (data) { + .rng => { + var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined; + rng.bytes(bytes[0..len]); + out.appendSliceAssumeCapacity(bytes[0..len]); + }, + .zero => out.appendNTimesAssumeCapacity(0, len), + .common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| { + c.* = switch (rng.int(u6)) { + 0 => ' ', + 1...10 => |x| '0' + (@as(u8, x) - 1), + 11...36 => |x| 'A' + (@as(u8, x) - 11), + 37 => '_', + 38...63 => |x| 'a' + (@as(u8, x) - 38), + }; + }, + .print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| { + c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E); + }, + .integer => { + const negative = len != 0 and rng.boolean(); + if (negative) { + out.appendAssumeCapacity('-'); + } + + for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| { + c.* = rng.intRangeAtMostBiased(u8, '0', '9'); + } + }, + .wtf8 => { + comptime assert(op != .set); + var codepoints: usize = if (data_ctx == .one) + 1 + else + rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4); + + while (true) { + const units1 = rng.int(u2); + const value = switch (units1) { + 0 => rng.int(u7), + 1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF), + 2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF), + 3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF), + }; + const units = @as(u3, units1) + 1; + + var buf: [4]u8 = undefined; + assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units); + out.appendSliceAssumeCapacity(buf[0..units]); + + codepoints -= 1; + if (codepoints == 0) break; + } + }, + .existing => { + const j = rng.uintAtMostBiased(usize, in.len - len); + out.appendSliceAssumeCapacity(in[j..][0..len]); + }, + .splice => { + const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len); + out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]); + }, + .@"const" => out.appendSliceAssumeCapacity(mem.asBytes( + &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)], + )), + .small => out.appendSliceAssumeCapacity(mem.asBytes( + &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]), + )), + .few => out.appendSliceAssumeCapacity(mem.asBytes( + &fewValue(rng, data_ctx[0], data_ctx[1]), + )), + } + switch (op) { + .set => out.appendSliceAssumeCapacity(in[i + len ..]), + .insert => out.appendSliceAssumeCapacity(in[i..]), + .push => {}, + } + }, + .remove => { + if (in.len == 0) return false; + const Op = enum { delete, pop }; + const op: Op, const max_len = class_ctx; + // LessThan is used so we don't delete the entire span (which is unproductive since + // an empty input has always been tried) + const len = if (max_len == 1) 1 else rng.uintLessThanBiased( + usize, + @min(max_len + 1, in.len), + ); + switch (op) { + .delete => { + const i = rng.uintAtMostBiased(usize, in.len - len); + out.appendSliceAssumeCapacity(in[0..i]); + out.appendSliceAssumeCapacity(in[i + len ..]); + }, + .pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]), + } + }, + .rmw => { + const Op = enum { xor, truncate, add, packed_rng }; + const op: Op, const T, const endian, const xor_bits = class_ctx; + if (in.len < @sizeOf(T)) return false; + const Log2T = math.Log2Int(T); + + const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T)); + const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian); + const new = switch (op) { + .xor => old ^ fewValue(rng, T, xor_bits), + .truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)), + .add => old +% addend: { + const val = rng.int(Mutation.AddValue); + break :addend if (val == 0) 1 else val; + }, + .packed_rng => blk: { + const bits = rng.int(math.Log2Int(T)) +| 1; + break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits)); + }, + }; + out.appendSliceAssumeCapacity(in); + mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* = + mem.nativeTo(T, new, endian); + }, + .move_span => { + if (in.len < 2) return false; + // One less since moving whole output will never change anything + const len = rng.intRangeAtMostBiased(usize, 1, @min( + in.len - 1, + Mutation.max_set_len, + )); + + const src = rng.uintAtMostBiased(usize, in.len - len); + // This indexes into the final input + const dst = blk: { + const res = rng.uintAtMostBiased(usize, in.len - len - 1); + break :blk res + @intFromBool(res >= src); + }; + + if (src < dst) { + out.appendSliceAssumeCapacity(in[0..src]); + out.appendSliceAssumeCapacity(in[src + len .. dst + len]); + out.appendSliceAssumeCapacity(in[src..][0..len]); + out.appendSliceAssumeCapacity(in[dst + len ..]); + } else { + out.appendSliceAssumeCapacity(in[0..dst]); + out.appendSliceAssumeCapacity(in[src..][0..len]); + out.appendSliceAssumeCapacity(in[dst..src]); + out.appendSliceAssumeCapacity(in[src + len ..]); + } + }, + .replicate_splice_span => { + if (in.len == 0) return false; + if (corpus.len == 1) return false; + const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)]; + const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len)); + const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len); + out.appendSliceAssumeCapacity(in[0..i]); + out.appendSliceAssumeCapacity(from[i..][0..len]); + out.appendSliceAssumeCapacity(in[i + len ..]); + }, + } + return true; + } +}; /// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. pub const MemoryMappedList = struct { @@ -654,8 +1397,23 @@ pub const MemoryMappedList = struct { fn growCapacity(current: usize, minimum: usize) usize { var new = current; while (true) { - new = std.mem.alignForward(usize, new + new / 2, std.heap.page_size_max); + new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max); if (new >= minimum) return new; } } + + pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void { + assert(l.items.len + 1 <= l.capacity); + l.items.len += 1; + volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]); + l.items[i] = item; + } + + pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 { + assert(l.items.len + 1 <= l.capacity); + const old = l.items[i]; + volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]); + l.items.len -= 1; + return old; + } }; diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index bf7e9580bc..6bea7654b2 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -252,9 +252,8 @@ pub fn sendUpdate( const seen_pcs = cov_header.seenBits(); const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic); const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic); - if (prev.unique_runs != unique_runs) { - // There has been an update. - if (prev.unique_runs == 0) { + { + if (unique_runs != 0 and prev.unique_runs == 0) { // We need to send initial context. const header: abi.SourceIndexHeader = .{ .directories_len = @intCast(coverage_map.coverage.directories.entries.len), diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig index d5b02d951a..2398a8ed8d 100644 --- a/lib/std/Build/abi.zig +++ b/lib/std/Build/abi.zig @@ -138,6 +138,26 @@ pub const Rebuild = extern struct { /// ABI bits specifically relating to the fuzzer interface. pub const fuzz = struct { + pub const TestOne = *const fn (Slice) callconv(.c) void; + pub extern fn fuzzer_init(cache_dir_path: Slice) void; + pub extern fn fuzzer_coverage_id() u64; + pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void; + pub extern fn fuzzer_new_input(bytes: Slice) void; + pub extern fn fuzzer_main() void; + + pub const Slice = extern struct { + ptr: [*]const u8, + len: usize, + + pub fn toSlice(s: Slice) []const u8 { + return s.ptr[0..s.len]; + } + + pub fn fromSlice(s: []const u8) Slice { + return .{ .ptr = s.ptr, .len = s.len }; + } + }; + /// libfuzzer uses this and its usize is the one that counts. To match the ABI, /// make the ints be the size of the target used with libfuzzer. /// diff --git a/lib/std/json/scanner_test.zig b/lib/std/json/scanner_test.zig index eb5d5cb75e..ae954a88f0 100644 --- a/lib/std/json/scanner_test.zig +++ b/lib/std/json/scanner_test.zig @@ -517,3 +517,20 @@ test isNumberFormattedLikeAnInteger { try std.testing.expect(!isNumberFormattedLikeAnInteger("1e10")); try std.testing.expect(!isNumberFormattedLikeAnInteger("1E10")); } + +test "fuzz" { + try std.testing.fuzz({}, fuzzTestOne, .{}); +} + +fn fuzzTestOne(_: void, input: []const u8) !void { + var buf: [16384]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&buf); + + var scanner = Scanner.initCompleteInput(fba.allocator(), input); + // Property: There are at most input.len tokens + var tokens: usize = 0; + while ((scanner.next() catch return) != .end_of_document) { + tokens += 1; + if (tokens > input.len) return error.Overflow; + } +} diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index fce5cbd209..d08f5b60ba 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6451,3 +6451,19 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void { try std.testing.expectEqual(expected, tree.errors[i].tag); } } + +test "fuzz ast parse" { + try std.testing.fuzz({}, fuzzTestOneParse, .{}); +} + +fn fuzzTestOneParse(_: void, input: []const u8) !void { + // The first byte holds if zig / zon + if (input.len == 0) return; + const mode: std.zig.Ast.Mode = if (input[0] & 1 == 0) .zig else .zon; + const bytes = input[1..]; + + var fba: std.heap.FixedBufferAllocator = .init(&fixed_buffer_mem); + const allocator = fba.allocator(); + const source = allocator.dupeZ(u8, bytes) catch return; + _ = std.zig.Ast.parse(allocator, source, mode) catch return; +} diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index de04ba41fe..2736b8be54 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1721,10 +1721,14 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v try std.testing.expectEqual(source.len, last_token.loc.end); } -fn testPropertiesUpheld(context: void, source: []const u8) anyerror!void { - _ = context; - const source0 = try std.testing.allocator.dupeZ(u8, source); - defer std.testing.allocator.free(source0); +fn testPropertiesUpheld(_: void, source: []const u8) !void { + var source0_buf: [512]u8 = undefined; + if (source.len + 1 > source0_buf.len) + return; + @memcpy(source0_buf[0..source.len], source); + source0_buf[source.len] = 0; + const source0 = source0_buf[0..source.len :0]; + var tokenizer = Tokenizer.init(source0); var tokenization_failed = false; while (true) { @@ -1750,18 +1754,15 @@ fn testPropertiesUpheld(context: void, source: []const u8) anyerror!void { } } - if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| { + if (tokenization_failed) return; + for (source0) |cur| { // Property: No null byte allowed except at end. if (cur == 0) { - try std.testing.expect(tokenization_failed); + return error.TestUnexpectedResult; } - // Property: No ASCII control characters other than \n and \t are allowed. - if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') { - try std.testing.expect(tokenization_failed); + // Property: No ASCII control characters other than \n, \t, and \r are allowed. + if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t' and cur != '\r') { + return error.TestUnexpectedResult; } - // Property: All '\r' must be followed by '\n'. - if (cur == '\r' and next != '\n') { - try std.testing.expect(tokenization_failed); - } - }; + } } diff --git a/test/standalone/libfuzzer/build.zig b/test/standalone/libfuzzer/build.zig index eecddcd852..54cc977abe 100644 --- a/test/standalone/libfuzzer/build.zig +++ b/test/standalone/libfuzzer/build.zig @@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, .fuzz = true, }), + .use_llvm = true, // #23423 }); b.installArtifact(exe); diff --git a/test/standalone/libfuzzer/main.zig b/test/standalone/libfuzzer/main.zig index 21d785866d..ae7b9941d5 100644 --- a/test/standalone/libfuzzer/main.zig +++ b/test/standalone/libfuzzer/main.zig @@ -1,29 +1,43 @@ const std = @import("std"); +const abi = std.Build.abi.fuzz; +const native_endian = @import("builtin").cpu.arch.endian(); -const FuzzerSlice = extern struct { - ptr: [*]const u8, - len: usize, - - fn fromSlice(s: []const u8) FuzzerSlice { - return .{ .ptr = s.ptr, .len = s.len }; - } -}; - -extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void; -extern fn fuzzer_init(cache_dir: FuzzerSlice) void; -extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void; -extern fn fuzzer_coverage_id() u64; +fn testOne(in: abi.Slice) callconv(.c) void { + std.debug.assertReadable(in.toSlice()); +} pub fn main() !void { - var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer _ = gpa.deinit(); - const args = try std.process.argsAlloc(gpa.allocator()); - defer std.process.argsFree(gpa.allocator(), args); + var debug_gpa_ctx: std.heap.DebugAllocator(.{}) = .init; + defer _ = debug_gpa_ctx.deinit(); + const gpa = debug_gpa_ctx.allocator(); - const cache_dir = args[1]; + var args = try std.process.argsWithAllocator(gpa); + defer args.deinit(); + _ = args.skip(); // executable name - fuzzer_init(FuzzerSlice.fromSlice(cache_dir)); - fuzzer_init_corpus_elem("hello".ptr, "hello".len); - fuzzer_set_name("test".ptr, "test".len); - _ = fuzzer_coverage_id(); + const cache_dir_path = args.next() orelse @panic("expected cache directory path argument"); + var cache_dir = try std.fs.cwd().openDir(cache_dir_path, .{}); + defer cache_dir.close(); + + abi.fuzzer_init(.fromSlice(cache_dir_path)); + abi.fuzzer_init_test(testOne, .fromSlice("test")); + abi.fuzzer_new_input(.fromSlice("")); + abi.fuzzer_new_input(.fromSlice("hello")); + + const pc_digest = abi.fuzzer_coverage_id(); + const coverage_file_path = "v/" ++ std.fmt.hex(pc_digest); + const coverage_file = try cache_dir.openFile(coverage_file_path, .{}); + defer coverage_file.close(); + + var read_buf: [@sizeOf(abi.SeenPcsHeader)]u8 = undefined; + var r = coverage_file.reader(&read_buf); + const pcs_header = r.interface.takeStruct(abi.SeenPcsHeader, native_endian) catch return r.err.?; + + if (pcs_header.pcs_len == 0) + return error.ZeroPcs; + const expected_len = @sizeOf(abi.SeenPcsHeader) + + try std.math.divCeil(usize, pcs_header.pcs_len, @bitSizeOf(usize)) * @sizeOf(usize) + + pcs_header.pcs_len * @sizeOf(usize); + if (try coverage_file.getEndPos() != expected_len) + return error.WrongEnd; }