zig/lib/fuzzer.zig
Matthew Lugg 010dcd6a9b
fuzzer: account for runtime address slide
This is relevant to PIEs, which are notably enabled by default on macOS.
The build system needs to only see virtual addresses, that is, those
which do not have the slide applied; but the fuzzer itself naturally
sees relocated addresses (i.e. with the slide applied). We just need to
subtract the slide when we communicate addresses to the build system.
2025-11-20 10:42:20 +00:00

1458 lines
61 KiB
Zig

const builtin = @import("builtin");
const std = @import("std");
const fatal = std.process.fatal;
const mem = std.mem;
const math = std.math;
const Allocator = mem.Allocator;
const assert = std.debug.assert;
const panic = std.debug.panic;
const abi = std.Build.abi.fuzz;
const native_endian = builtin.cpu.arch.endian();
pub const std_options = std.Options{
.logFn = logOverride,
};
fn logOverride(
comptime level: std.log.Level,
comptime scope: @Type(.enum_literal),
comptime format: []const u8,
args: anytype,
) void {
const f = log_f orelse
panic("attempt to use log before initialization, message:\n" ++ format, args);
f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
defer f.unlock();
var buf: [256]u8 = undefined;
var fw = f.writer(&buf);
const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
const prefix1 = comptime level.asText();
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
fw.interface.print(
"[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
.{current_test_name orelse "setup"} ++ args,
) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
}
var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
const gpa = switch (builtin.mode) {
.Debug => debug_allocator.allocator(),
.ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
};
/// Part of `exec`, however seperate to allow it to be set before `exec` is.
var log_f: ?std.fs.File = null;
var exec: Executable = .preinit;
var inst: Instrumentation = .preinit;
var fuzzer: Fuzzer = undefined;
var current_test_name: ?[]const u8 = null;
fn bitsetUsizes(elems: usize) usize {
return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
}
const Executable = struct {
/// Tracks the hit count for each pc as updated by the process's instrumentation.
pc_counters: []u8,
cache_f: std.fs.Dir,
/// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
/// while the fuzzer is running.
shared_seen_pcs: MemoryMappedList,
/// Hash of pcs used to uniquely identify the shared coverage file
pc_digest: u64,
/// A minimal state for this struct which instrumentation can function on.
/// Used before this structure is initialized to avoid illegal behavior
/// from instrumentation functions being called and using undefined values.
pub const preinit: Executable = .{
.pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
.cache_f = undefined,
.shared_seen_pcs = undefined,
.pc_digest = undefined,
};
fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
const pc_bitset_usizes = bitsetUsizes(pcs.len);
const coverage_file_name = std.fmt.hex(pc_digest);
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
var v = cache_dir.makeOpenPath("v", .{}) catch |e|
panic("failed to create directory 'v': {t}", .{e});
defer v.close();
const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
.read = true,
// If we create the file, we want to block other processes while we populate it
.lock = .exclusive,
.exclusive = true,
})) |f|
.{ f, true }
else |e| switch (e) {
error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
.mode = .read_write,
.lock = .shared,
}) catch |e2| panic(
"failed to open existing coverage file '{s}': {t}",
.{ &coverage_file_name, e2 },
), false },
else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
};
const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
pc_bitset_usizes * @sizeOf(usize) +
pcs.len * @sizeOf(usize);
if (populate) {
defer coverage_file.lock(.shared) catch |e| panic(
"failed to demote lock for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
"failed to init memory map for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
.n_runs = 0,
.unique_runs = 0,
.pcs_len = pcs.len,
}));
map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
// Relocations have been applied to `pcs` so it contains runtime addresses (with slide
// applied). We need to translate these to the virtual addresses as on disk.
for (pcs) |pc| {
const pc_vaddr = fuzzer_unslide_address(pc);
map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
}
return map;
} else {
const size = coverage_file.getEndPos() catch |e| panic(
"failed to stat coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
if (size != coverage_file_len) panic(
"incompatible existing coverage file '{s}' (differing lengths: {} != {})",
.{ &coverage_file_name, size, coverage_file_len },
);
const map = MemoryMappedList.init(
coverage_file,
coverage_file_len,
coverage_file_len,
) catch |e| panic(
"failed to init memory map for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
if (seen_pcs_header.pcs_len != pcs.len) panic(
"incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
.{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
);
if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
"incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
.{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
);
return map;
}
}
pub fn init(cache_dir_path: []const u8) Executable {
var self: Executable = undefined;
const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
"failed to open directory '{s}': {t}",
.{ cache_dir_path, e },
);
log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
panic("failed to open directory 'f': {t}", .{e});
// Linkers are expected to automatically add symbols prefixed with these for the start and
// end of sections whose names are valid C identifiers.
const ofmt = builtin.object_format;
const section_start_prefix, const section_end_prefix = switch (ofmt) {
.elf => .{ "__start_", "__stop_" },
.macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
};
self.pc_counters = blk: {
const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
const pc_counters_start = @extern([*]u8, .{
.name = pc_counters_start_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pc_counters_start_name});
const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
const pc_counters_end = @extern([*]u8, .{
.name = pc_counters_end_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pc_counters_end_name});
break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
};
const pcs = blk: {
const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
const pcs_start = @extern([*]usize, .{
.name = pcs_start_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pcs_start_name});
const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
const pcs_end = @extern([*]usize, .{
.name = pcs_end_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pcs_end_name});
break :blk pcs_start[0 .. pcs_end - pcs_start];
};
if (self.pc_counters.len != pcs.len) panic(
"pc counters length and pcs length do not match ({} != {})",
.{ self.pc_counters.len, pcs.len },
);
self.pc_digest = digest: {
// Relocations have been applied to `pcs` so it contains runtime addresses (with slide
// applied). We need to translate these to the virtual addresses as on disk.
var h: std.hash.Wyhash = .init(0);
for (pcs) |pc| {
const pc_vaddr = fuzzer_unslide_address(pc);
h.update(@ptrCast(&pc_vaddr));
}
break :digest h.final();
};
self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
return self;
}
pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
return .{ .pc_counters = self.pc_counters };
}
/// Iterates over pc_counters returning a bitset for if each of them have been hit
pub const PcBitsetIterator = struct {
index: usize = 0,
pc_counters: []u8,
pub fn next(self: *PcBitsetIterator) usize {
const rest = self.pc_counters[self.index..];
if (rest.len >= @bitSizeOf(usize)) {
defer self.index += @bitSizeOf(usize);
const V = @Vector(@bitSizeOf(usize), u8);
return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
} else if (rest.len != 0) {
defer self.index += rest.len;
var res: usize = 0;
for (0.., rest) |bit_index, byte| {
res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
}
return res;
} else unreachable;
}
};
};
/// Data gathered from instrumentation functions.
/// Seperate from Executable since its state is resetable and changes.
/// Seperate from Fuzzer since it may be needed before fuzzing starts.
const Instrumentation = struct {
/// Bitset of seen pcs across all runs excluding fresh pcs.
/// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
/// it which causes contention and unrelated pcs to our campaign being set.
seen_pcs: []usize,
/// Stores a fresh input's new pcs
fresh_pcs: []usize,
/// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
/// have been called from and have had their already been added to const_x_vals
const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
/// Values that have been constant operands in comparisons and switch cases.
/// There may be duplicates in this array if they came from different addresses, which is
/// fine as they are likely more important and hence more likely to be selected.
const_vals2: std.ArrayListUnmanaged(u16) = .empty,
const_vals4: std.ArrayListUnmanaged(u32) = .empty,
const_vals8: std.ArrayListUnmanaged(u64) = .empty,
const_vals16: std.ArrayListUnmanaged(u128) = .empty,
/// A minimal state for this struct which instrumentation can function on.
/// Used before this structure is initialized to avoid illegal behavior
/// from instrumentation functions being called and using undefined values.
pub const preinit: Instrumentation = .{
.seen_pcs = undefined, // currently only updated by `Fuzzer`
.fresh_pcs = undefined,
};
pub fn depreinit(self: *Instrumentation) void {
self.const_vals2.deinit(gpa);
self.const_vals4.deinit(gpa);
self.const_vals8.deinit(gpa);
self.const_vals16.deinit(gpa);
self.* = undefined;
}
pub fn init() Instrumentation {
const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
const alloc_usizes = pc_bitset_usizes * 2;
const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
const fba = fba_ctx.allocator();
var self: Instrumentation = .{
.seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
.fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
};
self.reset();
return self;
}
pub fn reset(self: *Instrumentation) void {
@memset(self.seen_pcs, 0);
@memset(self.fresh_pcs, 0);
self.const_pcs.clearRetainingCapacity();
self.const_vals2.clearRetainingCapacity();
self.const_vals4.clearRetainingCapacity();
self.const_vals8.clearRetainingCapacity();
self.const_vals16.clearRetainingCapacity();
}
/// If false is returned, then the pc is marked as seen
pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
}
pub fn isFresh(self: *Instrumentation) bool {
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs) |seen_pcs| {
if (hit_pcs.next() & ~seen_pcs != 0) return true;
}
return false;
}
/// Updates `fresh_pcs`
pub fn setFresh(self: *Instrumentation) void {
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
}
}
/// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
pub fn atleastFresh(self: *Instrumentation) bool {
var hit_pcs = exec.pcBitsetIterator();
for (self.fresh_pcs) |fresh_pcs| {
if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
}
return true;
}
/// Updates based off `fresh_pcs`
fn updateSeen(self: *Instrumentation) void {
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
const shared_seen_pcs: [*]volatile usize = @ptrCast(
exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
);
for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
seen.* |= fresh;
if (fresh != 0)
_ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
}
}
};
const Fuzzer = struct {
arena_ctx: std.heap.ArenaAllocator = .init(gpa),
rng: std.Random.DefaultPrng = .init(0),
test_one: abi.TestOne,
/// The next input that will be given to the testOne function. When the
/// current process crashes, this memory-mapped file is used to recover the
/// input.
input: MemoryMappedList,
/// Minimized past inputs leading to new pc hits.
/// These are randomly mutated in round-robin fashion
/// Element zero is always an empty input. It is gauraunteed no other elements are empty.
corpus: std.ArrayListUnmanaged([]const u8),
corpus_pos: usize,
/// List of past mutations that have led to new inputs. This way, the mutations that are the
/// most effective are the most likely to be selected again. Starts with one of each mutation.
mutations: std.ArrayListUnmanaged(Mutation) = .empty,
/// Filesystem directory containing found inputs for future runs
corpus_dir: std.fs.Dir,
corpus_dir_idx: usize = 0,
pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
var self: Fuzzer = .{
.test_one = test_one,
.input = undefined,
.corpus = .empty,
.corpus_pos = 0,
.mutations = .empty,
.corpus_dir = undefined,
};
const arena = self.arena_ctx.allocator();
self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
self.input = in: {
const f = self.corpus_dir.createFile("in", .{
.read = true,
.truncate = false,
// In case any other fuzz tests are running under the same test name,
// the input file is exclusively locked to ensures only one proceeds.
.lock = .exclusive,
.lock_nonblocking = true,
}) catch |e| switch (e) {
error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
else => panic("failed to create input file 'in': {t}", .{e}),
};
const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
const map = (if (size < std.heap.page_size_max)
MemoryMappedList.create(f, 8, std.heap.page_size_max)
else
MemoryMappedList.init(f, size, size)) catch |e|
panic("failed to memory map input file 'in': {t}", .{e});
// Perform a dry-run of the stored input if there was one in case it might reproduce a
// crash.
const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
}
break :in map;
};
inst.reset();
self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
// Ensure there is never an empty corpus. Additionally, an empty input usually leads to
// new inputs.
self.addInput(&.{});
while (true) {
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
const bytes = self.corpus_dir.readFileAlloc(
std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
arena,
.unlimited,
) catch |e| switch (e) {
error.FileNotFound => break,
else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
};
// No corpus file of length zero will ever be created
if (bytes.len == 0)
panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
self.addInput(bytes);
self.corpus_dir_idx += 1;
}
return self;
}
pub fn deinit(self: *Fuzzer) void {
self.input.deinit();
self.corpus.deinit(gpa);
self.mutations.deinit(gpa);
self.corpus_dir.close();
self.arena_ctx.deinit();
self.* = undefined;
}
pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
self.corpus.append(gpa, bytes) catch @panic("OOM");
self.input.clearRetainingCapacity();
self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
panic("could not resize shared input file: {t}", .{e});
self.input.items.len = 8;
self.input.appendSliceAssumeCapacity(bytes);
self.run();
inst.setFresh();
inst.updateSeen();
}
/// Assumes `fresh_pcs` correspond to the input
fn minimizeInput(self: *Fuzzer) void {
// The minimization technique is kept relatively simple, we sequentially try to remove each
// byte and check that the new pcs and memory loads are still hit.
var i = self.input.items.len;
while (i != 8) {
i -= 1;
const old = self.input.orderedRemove(i);
@memset(exec.pc_counters, 0);
self.run();
if (!inst.atleastFresh()) {
self.input.insertAssumeCapacity(i, old);
} else {
// This removal may have led to new pcs or memory loads being hit, so we need to
// update them to avoid duplicates.
inst.setFresh();
}
}
}
fn run(self: *Fuzzer) void {
// `pc_counters` is not cleared since only new hits are relevant.
mem.bytesAsValue(usize, self.input.items[0..8]).* =
mem.nativeToLittle(usize, self.input.items.len - 8);
self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
const header = mem.bytesAsValue(
abi.SeenPcsHeader,
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
);
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
}
pub fn cycle(self: *Fuzzer) void {
const input = self.corpus.items[self.corpus_pos];
self.corpus_pos += 1;
if (self.corpus_pos == self.corpus.items.len)
self.corpus_pos = 0;
const rng = self.rng.random();
const m = while (true) {
const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
if (!m.mutate(
rng,
input,
&self.input,
self.corpus.items,
inst.const_vals2.items,
inst.const_vals4.items,
inst.const_vals8.items,
inst.const_vals16.items,
)) continue;
break m;
};
self.run();
if (inst.isFresh()) {
@branchHint(.unlikely);
const header = mem.bytesAsValue(
abi.SeenPcsHeader,
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
);
_ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
inst.setFresh();
self.minimizeInput();
inst.updateSeen();
// An empty-input has always been tried, so if an empty input is fresh then the
// test has to be non-deterministic. This has to be checked as duplicate empty
// entries are not allowed.
if (self.input.items.len - 8 == 0) {
std.log.warn("non-deterministic test (empty input produces different hits)", .{});
_ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
return;
}
const arena = self.arena_ctx.allocator();
const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
self.corpus.append(gpa, bytes) catch @panic("OOM");
self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
// Write new corpus to cache
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
self.corpus_dir.writeFile(.{
.sub_path = std.fmt.bufPrint(
&name_buf,
"{x}",
.{self.corpus_dir_idx},
) catch unreachable,
.data = bytes,
}) catch |e| panic(
"failed to write corpus file '{x}': {t}",
.{ self.corpus_dir_idx, e },
);
self.corpus_dir_idx += 1;
}
}
};
/// Instrumentation must not be triggered before this function is called
export fn fuzzer_init(cache_dir_path: abi.Slice) void {
inst.depreinit();
exec = .init(cache_dir_path.toSlice());
inst = .init();
}
/// Invalid until `fuzzer_init` is called.
export fn fuzzer_coverage() abi.Coverage {
const coverage_id = exec.pc_digest;
const header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(exec.shared_seen_pcs.items.ptr));
var seen_count: usize = 0;
for (header.seenBits()) |chunk| {
seen_count += @popCount(chunk);
}
return .{
.id = coverage_id,
.runs = header.n_runs,
.unique = header.unique_runs,
.seen = seen_count,
};
}
/// fuzzer_init must be called beforehand
export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
current_test_name = unit_test_name.toSlice();
fuzzer = .init(test_one, unit_test_name.toSlice());
}
/// fuzzer_init_test must be called beforehand
/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
export fn fuzzer_new_input(bytes: abi.Slice) void {
// An entry of length zero is always added and duplicates of it are not allowed.
if (bytes.len != 0)
fuzzer.addInput(bytes.toSlice());
}
/// fuzzer_init_test must be called first
export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
switch (limit_kind) {
.forever => while (true) fuzzer.cycle(),
.iterations => for (0..amount) |_| fuzzer.cycle(),
}
}
export fn fuzzer_unslide_address(addr: usize) usize {
const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
std.debug.panic("failed to find virtual address slide: {t}", .{err});
};
return addr - slide;
}
/// Helps determine run uniqueness in the face of recursion.
/// Currently not used by the fuzzer.
export threadlocal var __sancov_lowest_stack: usize = 0;
/// Inline since the return address of the callee is required
inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
if (!inst.constPcSeen(@returnAddress())) {
@branchHint(.unlikely);
@field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
}
}
export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
_ = const_arg;
_ = arg;
}
export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
_ = arg;
genericConstCmp(u16, const_arg, "const_vals2");
}
export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
_ = arg;
genericConstCmp(u32, const_arg, "const_vals4");
}
export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
_ = arg;
genericConstCmp(u64, const_arg, "const_vals8");
}
export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
_ = val;
if (!inst.constPcSeen(@returnAddress())) {
@branchHint(.unlikely);
const case_bits = cases[1];
const cases_slice = cases[2..][0..cases[0]];
switch (case_bits) {
// 8-bit cases are ignored because they are likely to be randomly generated
0...8 => {},
9...16 => for (cases_slice) |c|
inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
17...32 => for (cases_slice) |c|
inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
33...64 => for (cases_slice) |c|
inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
else => {}, // Should be impossible
}
}
}
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
// Not valuable because we already have pc tracing via 8bit counters.
_ = callee;
}
export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however, fuzzer_init() does not need this information since it directly reads from the
// symbol table.
_ = start;
_ = end;
}
export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however, fuzzer_init() does not need this information since it directly reads from the
// symbol table.
_ = start;
_ = end;
}
/// Copy all of source into dest at position 0.
/// If the slices overlap, dest.ptr must be <= src.ptr.
fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
for (dest, source) |*d, s| d.* = s;
}
/// Copy all of source into dest at position 0.
/// If the slices overlap, dest.ptr must be >= src.ptr.
fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
var i = source.len;
while (i > 0) {
i -= 1;
dest[i] = source[i];
}
}
const Mutation = enum {
/// Applies .insert_*_span, .push_*_span
/// For wtf-8, this limits code units, not code points
const max_insert_len = 12;
/// Applies to .insert_large_*_span and .push_large_*_span
/// 4096 is used as it is a common sector size
const max_large_insert_len = 4096;
/// Applies to .delete_span and .pop_span
const max_delete_len = 16;
/// Applies to .set_*span, .move_span, .set_existing_span
const max_set_len = 12;
const max_replicate_len = 64;
const AddValue = i6;
const SmallValue = i10;
delete_byte,
delete_span,
/// Removes the last byte from the input
pop_byte,
pop_span,
/// Inserts a group of bytes which is already in the input and removes the original copy.
move_span,
/// Replaces a group of bytes in the input with another group of bytes in the input
set_existing_span,
insert_existing_span,
push_existing_span,
set_rng_byte,
set_rng_span,
insert_rng_byte,
insert_rng_span,
/// Adds a byte to the end of the input
push_rng_byte,
push_rng_span,
set_zero_byte,
set_zero_span,
insert_zero_byte,
insert_zero_span,
push_zero_byte,
push_zero_span,
/// Inserts a lot of zeros to the end of the input
/// This is intended to work with fuzz tests that require data in (large) blocks
push_large_zero_span,
/// Inserts a group of ascii printable character
insert_print_span,
/// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
insert_common_span,
/// Inserts a group of ascii digits possibly preceded by a `-`
insert_integer,
/// Code units are evenly distributed between one to four
insert_wtf8_char,
insert_wtf8_span,
/// Inserts a group of bytes from another input
insert_splice_span,
// utf16 is not yet included since insertion of random bytes should adaquetly check
// BMP character, surrogate handling, and occasionally chacters outside of the BMP.
set_print_span,
set_common_span,
set_splice_span,
/// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
replicate_splice_span,
push_print_span,
push_common_span,
push_integer,
push_wtf8_char,
push_wtf8_span,
push_splice_span,
/// Clears a random amount of high bits of a byte
truncate_8,
truncate_16le,
truncate_16be,
truncate_32le,
truncate_32be,
truncate_64le,
truncate_64be,
/// Flips a random bit
xor_1,
/// Swaps up to three bits of a byte biased to less bits
xor_few_8,
/// Swaps up to six bits of a 16-bit value biased to less bits
xor_few_16,
/// Swaps up to nine bits of a 32-bit value biased to less bits
xor_few_32,
/// Swaps up to twelve bits of 64-bit value biased to less bits
xor_few_64,
/// Adds to a byte a value of type AddValue
add_8,
add_16le,
add_16be,
add_32le,
add_32be,
add_64le,
add_64be,
/// Sets a 16-bit little-endian value to a value of type SmallValue
set_small_16le,
set_small_16be,
set_small_32le,
set_small_32be,
set_small_64le,
set_small_64be,
insert_small_16le,
insert_small_16be,
insert_small_32le,
insert_small_32be,
insert_small_64le,
insert_small_64be,
push_small_16le,
push_small_16be,
push_small_32le,
push_small_32be,
push_small_64le,
push_small_64be,
set_const_16,
set_const_32,
set_const_64,
set_const_128,
insert_const_16,
insert_const_32,
insert_const_64,
insert_const_128,
push_const_16,
push_const_32,
push_const_64,
push_const_128,
/// Sets a byte with up to three bits set biased to less bits
set_few_8,
/// Sets a 16-bit value with up to six bits set biased to less bits
set_few_16,
/// Sets a 32-bit value with up to nine bits set biased to less bits
set_few_32,
/// Sets a 64-bit value with up to twelve bits set biased to less bits
set_few_64,
insert_few_8,
insert_few_16,
insert_few_32,
insert_few_64,
push_few_8,
push_few_16,
push_few_32,
push_few_64,
/// Randomizes a random contigous group of bits in a byte
packed_set_rng_8,
packed_set_rng_16le,
packed_set_rng_16be,
packed_set_rng_32le,
packed_set_rng_32be,
packed_set_rng_64le,
packed_set_rng_64be,
fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
var result: T = 0;
var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
while (remaining_bits > 0) {
result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
remaining_bits -= 1;
}
return result;
}
/// Returns if the mutation was applicable to the input
pub fn mutate(
mutation: Mutation,
rng: std.Random,
in: []const u8,
out: *MemoryMappedList,
corpus: []const []const u8,
const_vals2: []const u16,
const_vals4: []const u32,
const_vals8: []const u64,
const_vals16: []const u128,
) bool {
out.clearRetainingCapacity();
const new_capacity = 8 + in.len + @max(
16, // builtin 128 value
Mutation.max_insert_len,
Mutation.max_large_insert_len,
);
out.ensureTotalCapacity(new_capacity) catch |e|
panic("could not resize shared input file: {t}", .{e});
out.items.len = 8; // Length field
const applied = switch (mutation) {
inline else => |m| m.comptimeMutate(
rng,
in,
out,
corpus,
const_vals2,
const_vals4,
const_vals8,
const_vals16,
),
};
if (!applied)
assert(out.items.len == 8)
else
assert(out.items.len <= new_capacity);
return applied;
}
/// Assumes out has already been cleared
fn comptimeMutate(
comptime mutation: Mutation,
rng: std.Random,
in: []const u8,
out: *MemoryMappedList,
corpus: []const []const u8,
const_vals2: []const u16,
const_vals4: []const u32,
const_vals8: []const u64,
const_vals16: []const u128,
) bool {
const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
const class: Class, const class_ctx = switch (mutation) {
// zig fmt: off
.move_span => .{ .move_span, null },
.replicate_splice_span => .{ .replicate_splice_span, null },
.delete_byte => .{ .remove, .{ .delete, 1 } },
.delete_span => .{ .remove, .{ .delete, max_delete_len } },
.pop_byte => .{ .remove, .{ .pop, 1 } },
.pop_span => .{ .remove, .{ .pop, max_delete_len } },
.set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
.set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
.set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
.set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
.set_common_span => .{ .new, .{ .set , 1, .common , .many } },
.set_print_span => .{ .new, .{ .set , 1, .print , .many } },
.set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
.set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
.set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
.set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
.set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
.set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
.set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
.set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
.set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
.set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
.set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
.set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
.set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
.set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
.set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
.set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
.insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
.insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
.insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
.insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
.insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
.insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
.insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
.insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
.insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
.insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
.insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
.insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
.insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
.insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
.insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
.insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
.insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
.insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
.insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
.insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
.insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
.insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
.insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
.insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
.insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
.push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
.push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
.push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
.push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
.push_print_span => .{ .new, .{ .push , 0, .print , .many } },
.push_common_span => .{ .new, .{ .push , 0, .common , .many } },
.push_integer => .{ .new, .{ .push , 0, .integer , .many } },
.push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
.push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
.push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
.push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
.push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
.push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
.push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
.push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
.push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
.push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
.push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
.push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
.push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
.push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
.push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
.push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
.push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
.push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
.push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
.xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
.xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
.xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
.xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
.xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
.truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
.truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
.truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
.truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
.truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
.truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
.truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
.add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
.add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
.add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
.add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
.add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
.add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
.add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
.packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
.packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
.packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
.packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
.packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
.packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
.packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
// zig fmt: on
};
switch (class) {
.new => {
const op: enum {
set,
insert,
push,
pub fn maxLen(comptime op: @This(), in_len: usize) usize {
return switch (op) {
.set => @min(in_len, max_set_len),
.insert, .push => max_insert_len,
};
}
}, const min_in_len, const data: enum {
rng,
zero,
common,
print,
integer,
wtf8,
existing,
splice,
@"const",
small,
few,
}, const data_ctx = class_ctx;
const Size = enum { one, many, large };
if (in.len < min_in_len) return false;
if (data == .@"const" and data_ctx.len == 0) return false;
const splice_i = if (data == .splice) blk: {
// Element zero always holds an empty input, so we do not select it
if (corpus.len == 1) return false;
break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
} else undefined;
// Only needs to be followed for set
const len = switch (data) {
else => switch (@as(Size, data_ctx)) {
.one => 1,
.many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
.large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
},
.wtf8 => undefined, // varies by size of each code unit
.splice => rng.intRangeAtMostBiased(usize, 1, @min(
corpus[splice_i].len,
op.maxLen(in.len),
)),
.existing => rng.intRangeAtMostBiased(usize, 1, @min(
in.len,
op.maxLen(in.len),
)),
.@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
.small, .few => @sizeOf(data_ctx[0]),
};
const i = switch (op) {
.set => rng.uintAtMostBiased(usize, in.len - len),
.insert => rng.uintAtMostBiased(usize, in.len),
.push => in.len,
};
out.appendSliceAssumeCapacity(in[0..i]);
switch (data) {
.rng => {
var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
rng.bytes(bytes[0..len]);
out.appendSliceAssumeCapacity(bytes[0..len]);
},
.zero => out.appendNTimesAssumeCapacity(0, len),
.common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
c.* = switch (rng.int(u6)) {
0 => ' ',
1...10 => |x| '0' + (@as(u8, x) - 1),
11...36 => |x| 'A' + (@as(u8, x) - 11),
37 => '_',
38...63 => |x| 'a' + (@as(u8, x) - 38),
};
},
.print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
},
.integer => {
const negative = len != 0 and rng.boolean();
if (negative) {
out.appendAssumeCapacity('-');
}
for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
c.* = rng.intRangeAtMostBiased(u8, '0', '9');
}
},
.wtf8 => {
comptime assert(op != .set);
var codepoints: usize = if (data_ctx == .one)
1
else
rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
while (true) {
const units1 = rng.int(u2);
const value = switch (units1) {
0 => rng.int(u7),
1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
};
const units = @as(u3, units1) + 1;
var buf: [4]u8 = undefined;
assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
out.appendSliceAssumeCapacity(buf[0..units]);
codepoints -= 1;
if (codepoints == 0) break;
}
},
.existing => {
const j = rng.uintAtMostBiased(usize, in.len - len);
out.appendSliceAssumeCapacity(in[j..][0..len]);
},
.splice => {
const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
},
.@"const" => out.appendSliceAssumeCapacity(@ptrCast(
&data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
)),
.small => out.appendSliceAssumeCapacity(@ptrCast(
&mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
)),
.few => out.appendSliceAssumeCapacity(@ptrCast(
&fewValue(rng, data_ctx[0], data_ctx[1]),
)),
}
switch (op) {
.set => out.appendSliceAssumeCapacity(in[i + len ..]),
.insert => out.appendSliceAssumeCapacity(in[i..]),
.push => {},
}
},
.remove => {
if (in.len == 0) return false;
const Op = enum { delete, pop };
const op: Op, const max_len = class_ctx;
// LessThan is used so we don't delete the entire span (which is unproductive since
// an empty input has always been tried)
const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
usize,
@min(max_len + 1, in.len),
);
switch (op) {
.delete => {
const i = rng.uintAtMostBiased(usize, in.len - len);
out.appendSliceAssumeCapacity(in[0..i]);
out.appendSliceAssumeCapacity(in[i + len ..]);
},
.pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
}
},
.rmw => {
const Op = enum { xor, truncate, add, packed_rng };
const op: Op, const T, const endian, const xor_bits = class_ctx;
if (in.len < @sizeOf(T)) return false;
const Log2T = math.Log2Int(T);
const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
const new = switch (op) {
.xor => old ^ fewValue(rng, T, xor_bits),
.truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
.add => old +% addend: {
const val = rng.int(Mutation.AddValue);
break :addend if (val == 0) 1 else val;
},
.packed_rng => blk: {
const bits = rng.int(math.Log2Int(T)) +| 1;
break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
},
};
out.appendSliceAssumeCapacity(in);
mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
mem.nativeTo(T, new, endian);
},
.move_span => {
if (in.len < 2) return false;
// One less since moving whole output will never change anything
const len = rng.intRangeAtMostBiased(usize, 1, @min(
in.len - 1,
Mutation.max_set_len,
));
const src = rng.uintAtMostBiased(usize, in.len - len);
// This indexes into the final input
const dst = blk: {
const res = rng.uintAtMostBiased(usize, in.len - len - 1);
break :blk res + @intFromBool(res >= src);
};
if (src < dst) {
out.appendSliceAssumeCapacity(in[0..src]);
out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
out.appendSliceAssumeCapacity(in[src..][0..len]);
out.appendSliceAssumeCapacity(in[dst + len ..]);
} else {
out.appendSliceAssumeCapacity(in[0..dst]);
out.appendSliceAssumeCapacity(in[src..][0..len]);
out.appendSliceAssumeCapacity(in[dst..src]);
out.appendSliceAssumeCapacity(in[src + len ..]);
}
},
.replicate_splice_span => {
if (in.len == 0) return false;
if (corpus.len == 1) return false;
const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
out.appendSliceAssumeCapacity(in[0..i]);
out.appendSliceAssumeCapacity(from[i..][0..len]);
out.appendSliceAssumeCapacity(in[i + len ..]);
},
}
return true;
}
};
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
pub const MemoryMappedList = struct {
/// Contents of the list.
///
/// Pointers to elements in this slice are invalidated by various functions
/// of this ArrayList in accordance with the respective documentation. In
/// all cases, "invalidated" means that the memory has been passed to this
/// allocator's resize or free function.
items: []align(std.heap.page_size_min) volatile u8,
/// How many bytes this list can hold without allocating additional memory.
capacity: usize,
/// The file is kept open so that it can be resized.
file: std.fs.File,
pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
const ptr = try std.posix.mmap(
null,
capacity,
std.posix.PROT.READ | std.posix.PROT.WRITE,
.{ .TYPE = .SHARED },
file.handle,
0,
);
return .{
.file = file,
.items = ptr[0..length],
.capacity = capacity,
};
}
pub fn create(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
try file.setEndPos(capacity);
return init(file, length, capacity);
}
pub fn deinit(l: *MemoryMappedList) void {
l.file.close();
std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
l.* = undefined;
}
/// Modify the array so that it can hold at least `additional_count` **more** items.
/// Invalidates element pointers if additional memory is needed.
pub fn ensureUnusedCapacity(l: *MemoryMappedList, additional_count: usize) !void {
return l.ensureTotalCapacity(l.items.len + additional_count);
}
/// If the current capacity is less than `new_capacity`, this function will
/// modify the array so that it can hold at least `new_capacity` items.
/// Invalidates element pointers if additional memory is needed.
pub fn ensureTotalCapacity(l: *MemoryMappedList, new_capacity: usize) !void {
if (l.capacity >= new_capacity) return;
const better_capacity = growCapacity(l.capacity, new_capacity);
return l.ensureTotalCapacityPrecise(better_capacity);
}
pub fn ensureTotalCapacityPrecise(l: *MemoryMappedList, new_capacity: usize) !void {
if (l.capacity >= new_capacity) return;
std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
try l.file.setEndPos(new_capacity);
l.* = try init(l.file, l.items.len, new_capacity);
}
/// Invalidates all element pointers.
pub fn clearRetainingCapacity(l: *MemoryMappedList) void {
l.items.len = 0;
}
/// Append the slice of items to the list.
/// Asserts that the list can hold the additional items.
pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
const old_len = l.items.len;
const new_len = old_len + items.len;
assert(new_len <= l.capacity);
l.items.len = new_len;
@memcpy(l.items[old_len..][0..items.len], items);
}
/// Extends the list by 1 element.
/// Never invalidates element pointers.
/// Asserts that the list can hold one additional item.
pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void {
const new_item_ptr = l.addOneAssumeCapacity();
new_item_ptr.* = item;
}
/// Increase length by 1, returning pointer to the new item.
/// The returned pointer becomes invalid when the list is resized.
/// Never invalidates element pointers.
/// Asserts that the list can hold one additional item.
pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 {
assert(l.items.len < l.capacity);
l.items.len += 1;
return &l.items[l.items.len - 1];
}
/// Append a value to the list `n` times.
/// Never invalidates element pointers.
/// The function is inline so that a comptime-known `value` parameter will
/// have better memset codegen in case it has a repeated byte pattern.
/// Asserts that the list can hold the additional items.
pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
const new_len = l.items.len + n;
assert(new_len <= l.capacity);
@memset(l.items.ptr[l.items.len..new_len], value);
l.items.len = new_len;
}
/// Resize the array, adding `n` new elements, which have `undefined` values.
/// The return value is a slice pointing to the newly allocated elements.
/// Never invalidates element pointers.
/// The returned pointer becomes invalid when the list is resized.
/// Asserts that the list can hold the additional items.
pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 {
assert(l.items.len + n <= l.capacity);
const prev_len = l.items.len;
l.items.len += n;
return l.items[prev_len..][0..n];
}
/// Called when memory growth is necessary. Returns a capacity larger than
/// minimum that grows super-linearly.
fn growCapacity(current: usize, minimum: usize) usize {
var new = current;
while (true) {
new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
if (new >= minimum) return new;
}
}
pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
assert(l.items.len + 1 <= l.capacity);
l.items.len += 1;
volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
l.items[i] = item;
}
pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
assert(l.items.len + 1 <= l.capacity);
const old = l.items[i];
volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
l.items.len -= 1;
return old;
}
};