From 0c4976adfa1d502c7c3baec9329d3e5a2446e47c Mon Sep 17 00:00:00 2001 From: Nathaniel Brough Date: Sat, 23 Dec 2023 17:04:31 -0800 Subject: [PATCH 1/3] Add gix-commitgraph::File fuzzer --- gix-commitgraph/fuzz/.gitignore | 4 +++ gix-commitgraph/fuzz/Cargo.toml | 30 +++++++++++++++++++ .../fuzz/fuzz_targets/fuzz_file.rs | 29 ++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 gix-commitgraph/fuzz/.gitignore create mode 100644 gix-commitgraph/fuzz/Cargo.toml create mode 100644 gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs diff --git a/gix-commitgraph/fuzz/.gitignore b/gix-commitgraph/fuzz/.gitignore new file mode 100644 index 00000000000..1a45eee7760 --- /dev/null +++ b/gix-commitgraph/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/gix-commitgraph/fuzz/Cargo.toml b/gix-commitgraph/fuzz/Cargo.toml new file mode 100644 index 00000000000..76fcd577e33 --- /dev/null +++ b/gix-commitgraph/fuzz/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "gix-commitgraph-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +anyhow = "1.0.76" +arbitrary = { version = "1.3.2", features = ["derive"] } +libfuzzer-sys = "0.4" +tempfile = "3.8.1" + +[dependencies.gix-commitgraph] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[profile.release] +debug = 1 + +[[bin]] +name = "fuzz_file" +path = "fuzz_targets/fuzz_file.rs" +test = false +doc = false diff --git a/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs b/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs new file mode 100644 index 00000000000..a2011233e0d --- /dev/null +++ b/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs @@ -0,0 +1,29 @@ +#![no_main] + +use anyhow::Result; +use arbitrary::Arbitrary; +use gix_commitgraph::File; +use libfuzzer_sys::fuzz_target; +use std::fs; +use std::hint::black_box; +use tempfile::NamedTempFile; + +fn fuzz(data: &[u8]) -> Result<()> { + let named_temp_file = NamedTempFile::new()?; + fs::write(named_temp_file.path(), data).expect("Unable to write fuzzed file"); + let file = File::try_from(named_temp_file.path())?; + + _ = black_box(file.iter_base_graph_ids().count()); + _ = black_box(file.iter_commits().count()); + _ = black_box(file.iter_ids().count()); + + let _ = black_box(file.checksum()); + let _ = black_box(file.verify_checksum()); + let _ = black_box(file.object_hash()); + + Ok(()) +} + +fuzz_target!(|data: &[u8]| { + _ = black_box(fuzz(data)); +}); From 8613c41001b92332d273e9343320059d4c7e7a0d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 24 Dec 2023 09:37:29 +0100 Subject: [PATCH 2/3] feat: `TryFrom for File` to allow more arbitrary inputs. This is particularly useful for fuzzing, but can certainly be interesting for other low-level applications as well. --- gix-commitgraph/src/file/init.rs | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/gix-commitgraph/src/file/init.rs b/gix-commitgraph/src/file/init.rs index 1d8f52e3f7b..df9ef369cda 100644 --- a/gix-commitgraph/src/file/init.rs +++ b/gix-commitgraph/src/file/init.rs @@ -1,3 +1,4 @@ +use std::path::PathBuf; use std::{ convert::{TryFrom, TryInto}, path::Path, @@ -62,24 +63,13 @@ impl File { pub fn at(path: impl AsRef) -> Result { Self::try_from(path.as_ref()) } -} -impl TryFrom<&Path> for File { - type Error = Error; - - fn try_from(path: &Path) -> Result { - let data = std::fs::File::open(path) - .and_then(|file| { - // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. - #[allow(unsafe_code)] - unsafe { - Mmap::map(&file) - } - }) - .map_err(|e| Error::Io { - err: e, - path: path.to_owned(), - })?; + /// A lower-level constructor which constructs a new instance directly from the mapping in `data`, + /// assuming that it originated from `path`. + /// + /// Note that `path` is only used for verification of the hash its basename contains, but otherwise + /// is not of importance. + pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result { let data_size = data.len(); if data_size < MIN_FILE_SIZE { return Err(Error::Corrupt( @@ -240,13 +230,33 @@ impl TryFrom<&Path> for File { extra_edges_list_range, fan, oid_lookup_offset, - path: path.to_owned(), + path, hash_len: object_hash.len_in_bytes(), object_hash, }) } } +impl TryFrom<&Path> for File { + type Error = Error; + + fn try_from(path: &Path) -> Result { + let data = std::fs::File::open(path) + .and_then(|file| { + // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. + #[allow(unsafe_code)] + unsafe { + Mmap::map(&file) + } + }) + .map_err(|e| Error::Io { + err: e, + path: path.to_owned(), + })?; + Self::new(data, path.to_owned()) + } +} + // Copied from gix-odb/pack/index/init.rs fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { let mut fan = [0; FAN_LEN]; From 672294ec00edda6af32bb093f44a6d2d9dc4ab59 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 24 Dec 2023 09:50:10 +0100 Subject: [PATCH 3/3] Let commitgraph fuzzer use the latest API This increases performance by 4x while reducing kernel load to not being present at all, down from 90%/nearly 1 CPU. --- gix-commitgraph/fuzz/Cargo.toml | 2 +- gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gix-commitgraph/fuzz/Cargo.toml b/gix-commitgraph/fuzz/Cargo.toml index 76fcd577e33..b15f90419ea 100644 --- a/gix-commitgraph/fuzz/Cargo.toml +++ b/gix-commitgraph/fuzz/Cargo.toml @@ -11,7 +11,7 @@ cargo-fuzz = true anyhow = "1.0.76" arbitrary = { version = "1.3.2", features = ["derive"] } libfuzzer-sys = "0.4" -tempfile = "3.8.1" +memmap2 = "0.9.0" [dependencies.gix-commitgraph] path = ".." diff --git a/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs b/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs index a2011233e0d..f6894f33bd0 100644 --- a/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs +++ b/gix-commitgraph/fuzz/fuzz_targets/fuzz_file.rs @@ -1,17 +1,17 @@ #![no_main] use anyhow::Result; -use arbitrary::Arbitrary; use gix_commitgraph::File; use libfuzzer_sys::fuzz_target; -use std::fs; use std::hint::black_box; -use tempfile::NamedTempFile; fn fuzz(data: &[u8]) -> Result<()> { - let named_temp_file = NamedTempFile::new()?; - fs::write(named_temp_file.path(), data).expect("Unable to write fuzzed file"); - let file = File::try_from(named_temp_file.path())?; + let data = { + let mut d = memmap2::MmapMut::map_anon(data.len())?; + d.copy_from_slice(data); + d.make_read_only()? + }; + let file = File::new(data, "does not matter".into())?; _ = black_box(file.iter_base_graph_ids().count()); _ = black_box(file.iter_commits().count());