Skip to content

Commit 69dbdec

Browse files
committed
greatly improve capabilities of the fuzzer
This PR significantly improves the capabilities of the fuzzer. For comparison, here is a ten minute head to head between the old and new fuzzer implementations (with newly included fuzz tests): -- Old -- ``` Total Runs: 49020931 Unique Runs: 1044131 (2.1%) Speed (Runs/Second): 81696 Coverage: 2069 / 15866 (13.0%) ``` (note: Unique Runs is highly inflated due of the inefficiency of the old implementation) -- New -- ``` Total Runs: 537039526 Unique Runs: 1511 (0.0%) Speed (Runs/Second): 894950 Coverage: 3000 / 15719 (19.1%) Examples: `while(C)i(){}else|` `{y:n()align(b)addrspace` `switch(P){else=>` `[:l]align(_:r:l)R` `(if(b){defer{nosuspend` `union(enum(I))` ``` NOTE: You have to rebuild the compiler due to new fuzzing instrumentation being enabled for memory loads. The changes made to the fuzzer to accomplish this feat mostly include tracking memory reads from .rodata to determine new runs, new mutations (especially the ones that insert const values from .rodata reads and __sanitizer_conv_const_cmp), and minimizing found inputs. Additionally, the runs per second has greatly been increased due to generating smaller inputs and avoiding clearing the 8-bit pc counters. An additional feature added is that the length of the input file is now stored and the old input file is rerun upon start, though this does not close ziglang#20803 since it does not output the input (though it can be verily easily retrieved from the cache directory.) Other changes made to the fuzzer include more logical initialization, using one shared file `in` for inputs, creating corpus files with proper sizes, and using hexadecimal-numbered corpus files for simplicity. Additionally, volatile was removed from MemoryMappedList since all that is needed is a guarantee that compiler has done the writes, which is already accomplished with atomic ordering. Furthermore, I added several new fuzz tests to gauge the fuzzer's efficiency. I also tried to add a test for zstandard decompression, which it crashed within 60,000 runs (less than a second.) Bug fixes include: * Fixed a race conditions when multiple fuzzer processes needed to use the same coverage file. * Web interface stats now update even when unique runs is not changing. * Fixed tokenizer.testPropertiesUpheld to allow stray carriage returns since they are valid whitespace. * Closes ziglang#23180 POSSIBLE IMPROVEMENTS: * Remove the 8-bit pc counting code prefer a call to a sanitizer function that updates a flag if a new pc hit happened (similar to how the __sanitizer_cov_load functions already operate). * Less basic input minimization function. It could also try splitting inputs into two between each byte to see if they both hit the same pcs. This is useful as smaller inputs are usually much more efficient. * Deterministic mutations when a new input is found. * Culling out corpus inputs that are redundant due to smaller inputs already hitting their pcs and memory addresses. * Applying multiple mutations during dry spells. * Prioritizing some corpus inputs. * Creating a list of the most successful input splices (which would likely contain grammar keywords) and creating a custom mutation for adding them. * Removing some less-efficient mutations. * Store effective mutations to the disk for the benefit of future runs. * Counting __sanitizer_cov `@returnAddress`es in determining unique runs. * Optimize __sanitizer_cov_trace_const_cmp methods (the use of an ArrayHashMap is not too fast). * Processor affinity * Exclude fuzzer's .rodata Nevertheless, I feel like the fuzzer is in a viable place to start being useful (as demonstrated in ziglang#23413)
1 parent 9f235a1 commit 69dbdec

File tree

10 files changed

+1427
-455
lines changed

10 files changed

+1427
-455
lines changed

lib/compiler/test_runner.zig

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const std = @import("std");
55
const io = std.io;
66
const testing = std.testing;
77
const assert = std.debug.assert;
8+
const fuzz_abi = std.Build.Fuzz.abi;
89

910
pub const std_options: std.Options = .{
1011
.logFn = log,
@@ -36,7 +37,6 @@ pub fn main() void {
3637

3738
var listen = false;
3839
var opt_cache_dir: ?[]const u8 = null;
39-
4040
for (args[1..]) |arg| {
4141
if (std.mem.eql(u8, arg, "--listen=-")) {
4242
listen = true;
@@ -53,7 +53,7 @@ pub fn main() void {
5353
fba.reset();
5454
if (builtin.fuzz) {
5555
const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument");
56-
fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
56+
fuzzer_init(.fromSlice(cache_dir));
5757
}
5858

5959
if (listen) {
@@ -148,14 +148,19 @@ fn mainServer() !void {
148148
});
149149
},
150150
.start_fuzzing => {
151+
// This ensures that this code won't be analyzed and hence reference fuzzer symbols
152+
// since they are not present.
151153
if (!builtin.fuzz) unreachable;
154+
152155
const index = try server.receiveBody_u32();
153156
const test_fn = builtin.test_functions[index];
154157
const entry_addr = @intFromPtr(test_fn.func);
158+
155159
try server.serveU64Message(.fuzz_start_addr, entry_addr);
156160
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
157161
is_fuzz_test = false;
158-
fuzzer_set_name(test_fn.name.ptr, test_fn.name.len);
162+
fuzz_test_index = index;
163+
159164
test_fn.func() catch |err| switch (err) {
160165
error.SkipZigTest => return,
161166
else => {
@@ -180,6 +185,8 @@ fn mainServer() !void {
180185

181186
fn mainTerminal() void {
182187
@disableInstrumentation();
188+
if (builtin.fuzz) @panic("fuzz test requires server");
189+
183190
const test_fn_list = builtin.test_functions;
184191
var ok_count: usize = 0;
185192
var skip_count: usize = 0;
@@ -330,28 +337,15 @@ pub fn mainSimple() anyerror!void {
330337
if (failed != 0) std.process.exit(1);
331338
}
332339

333-
const FuzzerSlice = extern struct {
334-
ptr: [*]const u8,
335-
len: usize,
336-
337-
/// Inline to avoid fuzzer instrumentation.
338-
inline fn toSlice(s: FuzzerSlice) []const u8 {
339-
return s.ptr[0..s.len];
340-
}
341-
342-
/// Inline to avoid fuzzer instrumentation.
343-
inline fn fromSlice(s: []const u8) FuzzerSlice {
344-
return .{ .ptr = s.ptr, .len = s.len };
345-
}
346-
};
347-
348340
var is_fuzz_test: bool = undefined;
341+
var fuzz_test_index: u32 = undefined;
349342

350-
extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
351-
extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
352-
extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
353-
extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.c) void) void;
343+
extern fn fuzzer_init(cache_dir_path: fuzz_abi.Slice) void;
354344
extern fn fuzzer_coverage_id() u64;
345+
const FuzzerTestOne = *const fn (fuzz_abi.Slice) callconv(.c) void;
346+
extern fn fuzzer_init_test(test_one: FuzzerTestOne, unit_test_name: fuzz_abi.Slice) void;
347+
extern fn fuzzer_new_input(bytes: fuzz_abi.Slice) void;
348+
extern fn fuzzer_main() void;
355349

356350
pub fn fuzz(
357351
context: anytype,
@@ -382,12 +376,12 @@ pub fn fuzz(
382376
const global = struct {
383377
var ctx: @TypeOf(context) = undefined;
384378

385-
fn fuzzer_one(input_ptr: [*]const u8, input_len: usize) callconv(.c) void {
379+
fn test_one(input: fuzz_abi.Slice) callconv(.c) void {
386380
@disableInstrumentation();
387381
testing.allocator_instance = .{};
388382
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
389383
log_err_count = 0;
390-
testOne(ctx, input_ptr[0..input_len]) catch |err| switch (err) {
384+
testOne(ctx, input.toSlice()) catch |err| switch (err) {
391385
error.SkipZigTest => return,
392386
else => {
393387
std.debug.lockStdErr();
@@ -408,10 +402,10 @@ pub fn fuzz(
408402
testing.allocator_instance = .{};
409403
defer testing.allocator_instance = prev_allocator_state;
410404

411-
for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len);
412-
405+
fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
406+
for (options.corpus) |elem| fuzzer_new_input(.fromSlice(elem));
413407
global.ctx = context;
414-
fuzzer_start(&global.fuzzer_one);
408+
fuzzer_main();
415409
return;
416410
}
417411

0 commit comments

Comments
 (0)