Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deduplicate OnDisk Corpus #2827

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] }
which = "6.0.3"
windows = "0.59.0"
z3 = "0.12.1"

fs2 = "0.4.3" # Used by OnDisk Corpus for file locking

[workspace.lints.rust]
# Forbid
Expand Down
15 changes: 2 additions & 13 deletions fuzzers/baby/tutorial/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
use std::hash::Hash;

use lain::prelude::*;
use libafl::{
corpus::CorpusId,
inputs::{HasTargetBytes, Input},
};
use libafl::inputs::{HasTargetBytes, Input};
use libafl_bolts::{ownedref::OwnedSlice, HasLen};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -48,15 +45,7 @@ pub enum PacketType {
Reset = 0x2,
}

impl Input for PacketData {
fn generate_name(&self, id: Option<CorpusId>) -> String {
if let Some(id) = id {
format!("id_{}", id.0)
} else {
"id_unknown".into()
}
}
}
impl Input for PacketData {}

impl HasTargetBytes for PacketData {
#[inline]
Expand Down
9 changes: 2 additions & 7 deletions fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ use core::num::NonZeroUsize;
use std::{borrow::Cow, hash::Hash};

use libafl::{
corpus::CorpusId,
generators::{Generator, RandBytesGenerator},
inputs::{BytesInput, HasTargetBytes, Input},
mutators::{MutationResult, Mutator},
state::HasRand,
Error, SerdeAny,
};
use libafl_bolts::{generic_hash_std, rands::Rand, Named};
use libafl_bolts::{rands::Rand, Named};
use serde::{Deserialize, Serialize};

/// The custom [`Input`] type used in this example, consisting of a byte array part, a byte array that is not always present, and a boolean
Expand All @@ -28,11 +27,7 @@ pub struct CustomInput {
}

/// Hash-based implementation
impl Input for CustomInput {
fn generate_name(&self, _id: Option<CorpusId>) -> String {
format!("{:016x}", generic_hash_std(self))
}
}
impl Input for CustomInput {}

impl CustomInput {
/// Returns a mutable reference to the byte array
Expand Down
3 changes: 3 additions & 0 deletions libafl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ std = [
"libafl_bolts/std",
"typed-builder",
"fastbloom",
"fs2",
]

## Tracks the Feedbacks and the Objectives that were interesting for a Testcase
Expand Down Expand Up @@ -284,6 +285,8 @@ const_panic = { version = "0.2.9", default-features = false } # similarly, for f
pyo3 = { workspace = true, optional = true }
regex-syntax = { version = "0.8.4", optional = true } # For nautilus

fs2 = { workspace = true, optional = true } # used by OnDisk Corpus for file locking

# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
serial_test = { workspace = true, optional = true, default-features = false, features = [
"logging",
Expand Down
147 changes: 80 additions & 67 deletions libafl/src/corpus/inmemory_ondisk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`]
//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner.

use alloc::string::String;
use alloc::string::{String, ToString};
use core::cell::RefCell;
use std::{
fs,
fs::{File, OpenOptions},
io,
io::Write,
io::{Read, Write},
path::{Path, PathBuf},
};

use fs2::FileExt;
#[cfg(feature = "gzip")]
use libafl_bolts::compress::GzipCompressor;
use serde::{Deserialize, Serialize};
Expand All @@ -33,7 +34,11 @@ use crate::{
/// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name,
/// it will instead return the respective [`io::Error`].
fn create_new<P: AsRef<Path>>(path: P) -> Result<File, io::Error> {
OpenOptions::new().write(true).create_new(true).open(path)
OpenOptions::new()
.write(true)
.read(true)
.create_new(true)
.open(path)
}

/// Tries to create the given `path` and returns `None` _only_ if the file already existed.
Expand Down Expand Up @@ -87,7 +92,7 @@ where
fn add(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
let id = self.inner.add(testcase)?;
let testcase = &mut self.get(id).unwrap().borrow_mut();
self.save_testcase(testcase, id)?;
self.save_testcase(testcase)?;
*testcase.input_mut() = None;
Ok(id)
}
Expand All @@ -97,7 +102,7 @@ where
fn add_disabled(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
let id = self.inner.add_disabled(testcase)?;
let testcase = &mut self.get_from_all(id).unwrap().borrow_mut();
self.save_testcase(testcase, id)?;
self.save_testcase(testcase)?;
*testcase.input_mut() = None;
Ok(id)
}
Expand All @@ -108,7 +113,7 @@ where
let entry = self.inner.replace(id, testcase)?;
self.remove_testcase(&entry)?;
let testcase = &mut self.get(id).unwrap().borrow_mut();
self.save_testcase(testcase, id)?;
self.save_testcase(testcase)?;
*testcase.input_mut() = None;
Ok(entry)
}
Expand Down Expand Up @@ -317,12 +322,14 @@ impl<I> InMemoryOnDiskCorpus<I> {

/// Sets the filename for a [`Testcase`].
/// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename.
/// Renaming testcases will most likely cause duplicate testcases to not be handled correctly
/// if testcases with the same input are not given the same filename.
/// Only rename when you know what you are doing.
#[inline]
pub fn rename_testcase(
&self,
testcase: &mut Testcase<I>,
filename: String,
) -> Result<(), Error> {
pub fn rename_testcase(&self, testcase: &mut Testcase<I>, filename: String) -> Result<(), Error>
where
I: Input,
{
if testcase.filename().is_some() {
// We are renaming!

Expand All @@ -335,36 +342,10 @@ impl<I> InMemoryOnDiskCorpus<I> {
return Ok(());
}

if self.locking {
let new_lock_filename = format!(".{new_filename}.lafl_lock");

// Try to create lock file for new testcases
if let Err(err) = create_new(self.dir_path.join(&new_lock_filename)) {
*testcase.filename_mut() = Some(old_filename);
return Err(Error::illegal_state(format!(
"Unable to create lock file {new_lock_filename} for new testcase: {err}"
)));
}
}

let new_file_path = self.dir_path.join(&new_filename);

fs::rename(testcase.file_path().as_ref().unwrap(), &new_file_path)?;

let new_metadata_path = {
if let Some(old_metadata_path) = testcase.metadata_path() {
// We have metadata. Let's rename it.
let new_metadata_path = self.dir_path.join(format!(".{new_filename}.metadata"));
fs::rename(old_metadata_path, &new_metadata_path)?;

Some(new_metadata_path)
} else {
None
}
};

*testcase.metadata_path_mut() = new_metadata_path;
self.remove_testcase(testcase)?;
*testcase.filename_mut() = Some(new_filename);
self.save_testcase(testcase)?;
*testcase.file_path_mut() = Some(new_file_path);

Ok(())
Expand All @@ -375,42 +356,54 @@ impl<I> InMemoryOnDiskCorpus<I> {
}
}

fn save_testcase(&self, testcase: &mut Testcase<I>, id: CorpusId) -> Result<(), Error>
fn save_testcase(&self, testcase: &mut Testcase<I>) -> Result<(), Error>
where
I: Input,
{
let file_name_orig = testcase.filename_mut().take().unwrap_or_else(|| {
let file_name = testcase.filename_mut().take().unwrap_or_else(|| {
// TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL)
testcase.input().as_ref().unwrap().generate_name(Some(id))
testcase.input().as_ref().unwrap().generate_name()
});

// New testcase, we need to save it.
let mut file_name = file_name_orig.clone();

let mut ctr = 2;
let file_name = if self.locking {
loop {
let lockfile_name = format!(".{file_name}.lafl_lock");
let lockfile_path = self.dir_path.join(lockfile_name);

if try_create_new(lockfile_path)?.is_some() {
break file_name;
}

file_name = format!("{file_name_orig}-{ctr}");
ctr += 1;
let mut ctr = String::new();
if self.locking {
let lockfile_name = format!(".{file_name}");
let lockfile_path = self.dir_path.join(lockfile_name);

let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or(
OpenOptions::new()
.write(true)
.read(true)
.open(&lockfile_path)?,
);
lockfile.lock_exclusive()?;

lockfile.read_to_string(&mut ctr)?;
ctr = ctr.trim().to_string();
if ctr.is_empty() {
ctr = String::from("1");
} else {
ctr = (ctr.parse::<u32>()? + 1).to_string();
}
} else {
file_name
};

lockfile.write_all(ctr.as_bytes())?;
}

if testcase.file_path().is_none() {
*testcase.file_path_mut() = Some(self.dir_path.join(&file_name));
}
*testcase.filename_mut() = Some(file_name);

if self.meta_format.is_some() {
let metafile_name = format!(".{}.metadata", testcase.filename().as_ref().unwrap());
let metafile_name = if self.locking {
format!(
".{}_{}.metadata",
testcase.filename().as_ref().unwrap(),
ctr
)
} else {
format!(".{}.metadata", testcase.filename().as_ref().unwrap())
};
let metafile_path = self.dir_path.join(&metafile_name);
let mut tmpfile_path = metafile_path.clone();
tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",));
Expand Down Expand Up @@ -453,15 +446,35 @@ impl<I> InMemoryOnDiskCorpus<I> {

fn remove_testcase(&self, testcase: &Testcase<I>) -> Result<(), Error> {
if let Some(filename) = testcase.filename() {
let mut ctr = String::new();
if self.locking {
let lockfile_path = self.dir_path.join(format!(".{filename}"));
let mut lockfile = OpenOptions::new()
.write(true)
.read(true)
.open(&lockfile_path)?;

lockfile.lock_exclusive()?;
lockfile.read_to_string(&mut ctr)?;
ctr = ctr.trim().to_string();

if ctr == "1" {
FileExt::unlock(&lockfile)?;
drop(fs::remove_file(lockfile_path));
} else {
lockfile.write_all(&(ctr.parse::<u32>()? - 1).to_le_bytes())?;
return Ok(());
}
}

fs::remove_file(self.dir_path.join(filename))?;
if self.meta_format.is_some() {
fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?;
if self.locking {
fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?;
} else {
fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?;
}
}
// also try to remove the corresponding `.lafl_lock` file if it still exists
// (even though it shouldn't exist anymore, at this point in time)
drop(fs::remove_file(
self.dir_path.join(format!(".{filename}.lafl_lock")),
));
}
Ok(())
}
Expand Down
2 changes: 2 additions & 0 deletions libafl/src/corpus/testcase.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ impl<I> Testcase<I> {
}

/// Create a new Testcase instance given an input and a `filename`
/// If locking is enabled, make sure that testcases with the same input have the same filename
/// to prevent ending up with duplicate testcases
#[inline]
pub fn with_filename(input: I, filename: String) -> Self {
Self {
Expand Down
2 changes: 1 addition & 1 deletion libafl/src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ where
fn name_detailed(&self) -> Cow<'static, str> {
match self {
Event::NewTestcase { input, .. } => {
Cow::Owned(format!("Testcase {}", input.generate_name(None)))
Cow::Owned(format!("Testcase {}", input.generate_name()))
}
Event::UpdateExecStats { .. } => Cow::Borrowed("Client Heartbeat"),
Event::UpdateUserStats { .. } => Cow::Borrowed("UserStats"),
Expand Down
2 changes: 1 addition & 1 deletion libafl/src/executors/hooks/unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ pub mod unix_signal_handler {
let mut bsod = Vec::new();
{
let mut writer = std::io::BufWriter::new(&mut bsod);
let _ = writeln!(writer, "input: {:?}", input.generate_name(None));
let _ = writeln!(writer, "input: {:?}", input.generate_name());
let bsod = libafl_bolts::minibsod::generate_minibsod(
&mut writer,
signal,
Expand Down
2 changes: 1 addition & 1 deletion libafl/src/executors/hooks/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ pub mod windows_exception_handler {
let mut bsod = Vec::new();
{
let mut writer = std::io::BufWriter::new(&mut bsod);
writeln!(writer, "input: {:?}", input.generate_name(None)).unwrap();
writeln!(writer, "input: {:?}", input.generate_name()).unwrap();
libafl_bolts::minibsod::generate_minibsod(&mut writer, exception_pointers)
.unwrap();
writer.flush().unwrap();
Expand Down
4 changes: 2 additions & 2 deletions libafl/src/inputs/encoded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use libafl_bolts::{Error, HasLen};
use regex::Regex;
use serde::{Deserialize, Serialize};

use crate::{corpus::CorpusId, inputs::Input};
use crate::inputs::Input;

/// Trait to encode bytes to an [`EncodedInput`] using the given [`Tokenizer`]
pub trait InputEncoder<T>
Expand Down Expand Up @@ -202,7 +202,7 @@ pub struct EncodedInput {
impl Input for EncodedInput {
/// Generate a name for this input
#[must_use]
fn generate_name(&self, _id: Option<CorpusId>) -> String {
fn generate_name(&self) -> String {
let mut hasher = RandomState::with_seeds(0, 0, 0, 0).build_hasher();
for code in &self.codes {
hasher.write(&code.to_le_bytes());
Expand Down
4 changes: 2 additions & 2 deletions libafl/src/inputs/gramatron.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use ahash::RandomState;
use libafl_bolts::{Error, HasLen};
use serde::{Deserialize, Serialize};

use crate::{corpus::CorpusId, inputs::Input};
use crate::inputs::Input;

/// A terminal for gramatron grammar fuzzing
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -44,7 +44,7 @@ pub struct GramatronInput {
impl Input for GramatronInput {
/// Generate a name for this input
#[must_use]
fn generate_name(&self, _id: Option<CorpusId>) -> String {
fn generate_name(&self) -> String {
let mut hasher = RandomState::with_seeds(0, 0, 0, 0).build_hasher();
for term in &self.terms {
hasher.write(term.symbol.as_bytes());
Expand Down
Loading
Loading