greatly improve capabilities of the fuzzer

This PR significantly improves the capabilities of the fuzzer.

The changes made to the fuzzer to accomplish this feat mostly include
tracking memory reads from .rodata to determine fresh inputs, new
mutations (especially the ones that insert const values from .rodata
reads and __sanitizer_conv_const_cmp), and minimizing found inputs.
Additionally, the runs per second has greatly been increased due to
generating smaller inputs and avoiding clearing the 8-bit pc counters.

An additional feature added is that the length of the input file is now
stored and the old input file is rerun upon start.

Other changes made to the fuzzer include more logical initialization,
using one shared file `in` for inputs, creating corpus files with
proper sizes, and using hexadecimal-numbered corpus files for
simplicity.

Furthermore, I added several new fuzz tests to gauge the fuzzer's
efficiency. I also tried to add a test for zstandard decompression,
which it crashed within 60,000 runs (less than a second.)

Bug fixes include:
* Fixed a race conditions when multiple fuzzer processes needed to use
the same coverage file.
* Web interface stats now update even when unique runs is not changing.
* Fixed tokenizer.testPropertiesUpheld to allow stray carriage returns
since they are valid whitespace.
This commit is contained in:
Kendall Condon 2025-05-01 16:52:56 -04:00
parent a0ec4e270e
commit e66b269333
7 changed files with 1510 additions and 539 deletions

View File

@ -4,6 +4,7 @@ const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const assert = std.debug.assert;
const fuzz_abi = std.Build.abi.fuzz;
pub const std_options: std.Options = .{
.logFn = log,
@ -57,7 +58,7 @@ pub fn main() void {
fba.reset();
if (builtin.fuzz) {
const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument");
fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
fuzz_abi.fuzzer_init(.fromSlice(cache_dir));
}
if (listen) {
@ -78,7 +79,7 @@ fn mainServer() !void {
});
if (builtin.fuzz) {
const coverage_id = fuzzer_coverage_id();
const coverage_id = fuzz_abi.fuzzer_coverage_id();
try server.serveU64Message(.coverage_id, coverage_id);
}
@ -152,14 +153,19 @@ fn mainServer() !void {
});
},
.start_fuzzing => {
// This ensures that this code won't be analyzed and hence reference fuzzer symbols
// since they are not present.
if (!builtin.fuzz) unreachable;
const index = try server.receiveBody_u32();
const test_fn = builtin.test_functions[index];
const entry_addr = @intFromPtr(test_fn.func);
try server.serveU64Message(.fuzz_start_addr, entry_addr);
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
is_fuzz_test = false;
fuzzer_set_name(test_fn.name.ptr, test_fn.name.len);
fuzz_test_index = index;
test_fn.func() catch |err| switch (err) {
error.SkipZigTest => return,
else => {
@ -184,6 +190,8 @@ fn mainServer() !void {
fn mainTerminal() void {
@disableInstrumentation();
if (builtin.fuzz) @panic("fuzz test requires server");
const test_fn_list = builtin.test_functions;
var ok_count: usize = 0;
var skip_count: usize = 0;
@ -333,28 +341,8 @@ pub fn mainSimple() anyerror!void {
if (failed != 0) std.process.exit(1);
}
const FuzzerSlice = extern struct {
ptr: [*]const u8,
len: usize,
/// Inline to avoid fuzzer instrumentation.
inline fn toSlice(s: FuzzerSlice) []const u8 {
return s.ptr[0..s.len];
}
/// Inline to avoid fuzzer instrumentation.
inline fn fromSlice(s: []const u8) FuzzerSlice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
var is_fuzz_test: bool = undefined;
extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.c) void) void;
extern fn fuzzer_coverage_id() u64;
var fuzz_test_index: u32 = undefined;
pub fn fuzz(
context: anytype,
@ -385,12 +373,12 @@ pub fn fuzz(
const global = struct {
var ctx: @TypeOf(context) = undefined;
fn fuzzer_one(input_ptr: [*]const u8, input_len: usize) callconv(.c) void {
fn test_one(input: fuzz_abi.Slice) callconv(.c) void {
@disableInstrumentation();
testing.allocator_instance = .{};
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
log_err_count = 0;
testOne(ctx, input_ptr[0..input_len]) catch |err| switch (err) {
testOne(ctx, input.toSlice()) catch |err| switch (err) {
error.SkipZigTest => return,
else => {
std.debug.lockStdErr();
@ -411,10 +399,11 @@ pub fn fuzz(
testing.allocator_instance = .{};
defer testing.allocator_instance = prev_allocator_state;
for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len);
global.ctx = context;
fuzzer_start(&global.fuzzer_one);
fuzz_abi.fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
for (options.corpus) |elem|
fuzz_abi.fuzzer_new_input(.fromSlice(elem));
fuzz_abi.fuzzer_main();
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -252,9 +252,8 @@ pub fn sendUpdate(
const seen_pcs = cov_header.seenBits();
const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic);
const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic);
if (prev.unique_runs != unique_runs) {
// There has been an update.
if (prev.unique_runs == 0) {
{
if (unique_runs != 0 and prev.unique_runs == 0) {
// We need to send initial context.
const header: abi.SourceIndexHeader = .{
.directories_len = @intCast(coverage_map.coverage.directories.entries.len),

View File

@ -138,6 +138,26 @@ pub const Rebuild = extern struct {
/// ABI bits specifically relating to the fuzzer interface.
pub const fuzz = struct {
pub const TestOne = *const fn (Slice) callconv(.c) void;
pub extern fn fuzzer_init(cache_dir_path: Slice) void;
pub extern fn fuzzer_coverage_id() u64;
pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
pub extern fn fuzzer_new_input(bytes: Slice) void;
pub extern fn fuzzer_main() void;
pub const Slice = extern struct {
ptr: [*]const u8,
len: usize,
pub fn toSlice(s: Slice) []const u8 {
return s.ptr[0..s.len];
}
pub fn fromSlice(s: []const u8) Slice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
/// libfuzzer uses this and its usize is the one that counts. To match the ABI,
/// make the ints be the size of the target used with libfuzzer.
///

View File

@ -1115,7 +1115,7 @@ pub const Object = struct {
.NoPrune = false,
// Workaround for https://github.com/llvm/llvm-project/pull/106464
.StackDepth = true,
.TraceLoads = false,
.TraceLoads = options.fuzz,
.TraceStores = false,
.CollectControlFlow = false,
},

View File

@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.fuzz = true,
}),
.use_llvm = true, // #23423
});
b.installArtifact(exe);

View File

@ -1,29 +1,43 @@
const std = @import("std");
const abi = std.Build.abi.fuzz;
const native_endian = @import("builtin").cpu.arch.endian();
const FuzzerSlice = extern struct {
ptr: [*]const u8,
len: usize,
fn fromSlice(s: []const u8) FuzzerSlice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
extern fn fuzzer_coverage_id() u64;
fn testOne(in: abi.Slice) callconv(.c) void {
std.debug.assertReadable(in.toSlice());
}
pub fn main() !void {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
defer _ = gpa.deinit();
const args = try std.process.argsAlloc(gpa.allocator());
defer std.process.argsFree(gpa.allocator(), args);
var debug_gpa_ctx: std.heap.DebugAllocator(.{}) = .init;
defer _ = debug_gpa_ctx.deinit();
const gpa = debug_gpa_ctx.allocator();
const cache_dir = args[1];
var args = try std.process.argsWithAllocator(gpa);
defer args.deinit();
_ = args.skip(); // executable name
fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
fuzzer_init_corpus_elem("hello".ptr, "hello".len);
fuzzer_set_name("test".ptr, "test".len);
_ = fuzzer_coverage_id();
const cache_dir_path = args.next() orelse @panic("expected cache directory path argument");
var cache_dir = try std.fs.cwd().openDir(cache_dir_path, .{});
defer cache_dir.close();
abi.fuzzer_init(.fromSlice(cache_dir_path));
abi.fuzzer_init_test(testOne, .fromSlice("test"));
abi.fuzzer_new_input(.fromSlice(""));
abi.fuzzer_new_input(.fromSlice("hello"));
const pc_digest = abi.fuzzer_coverage_id();
const coverage_file_path = "v/" ++ std.fmt.hex(pc_digest);
const coverage_file = try cache_dir.openFile(coverage_file_path, .{});
defer coverage_file.close();
var read_buf: [@sizeOf(abi.SeenPcsHeader)]u8 = undefined;
var r = coverage_file.reader(&read_buf);
const pcs_header = r.interface.takeStruct(abi.SeenPcsHeader, native_endian) catch return r.err.?;
if (pcs_header.pcs_len == 0)
return error.ZeroPcs;
const expected_len = @sizeOf(abi.SeenPcsHeader) +
try std.math.divCeil(usize, pcs_header.pcs_len, @bitSizeOf(usize)) * @sizeOf(usize) +
pcs_header.pcs_len * @sizeOf(usize);
if (try coverage_file.getEndPos() != expected_len)
return error.WrongEnd;
}