diff --git a/Cargo.lock b/Cargo.lock index ca292869281..2df79331bb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1548,6 +1548,7 @@ dependencies = [ "gix-odb", "gix-ref 0.49.1", "gix-testtools", + "gix-trace 0.1.11", "gix-traverse 0.43.1", "gix-worktree 0.38.0", ] diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index bfb86661be2..d33d34f3c4a 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -1,19 +1,29 @@ use gix::bstr::BStr; use std::ffi::OsStr; -pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> { +pub fn blame_file( + mut repo: gix::Repository, + file: &OsStr, + out: impl std::io::Write, + err: Option<&mut dyn std::io::Write>, +) -> anyhow::Result<()> { repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); let suspect = repo.head()?.peel_to_commit_in_place()?; let traverse = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) + .with_commit_graph(repo.commit_graph_if_enabled()?) .build()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; let file_path: &BStr = gix::path::os_str_into_bstr(file)?; let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?; + let statistics = outcome.statistics; write_blame_entries(out, outcome)?; + if let Some(err) = err { + writeln!(err, "{statistics:#?}")?; + } Ok(()) } diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index ed0018bc9ee..039f4788bfd 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,6 +14,7 @@ rust-version = "1.65" doctest = false [dependencies] +gix-trace = { version = "^0.1.11", path = "../gix-trace" } gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.46.0", path = "../gix-object" } gix-hash = { version = "^0.15.0", path = "../gix-hash" } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 4ec6d24780f..b869b81a726 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,5 +1,5 @@ use super::{process_changes, Change, Offset, UnblamedHunk}; -use crate::{BlameEntry, Outcome}; +use crate::{BlameEntry, Outcome, Statistics}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; @@ -62,9 +62,12 @@ pub fn file( let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { todo!("return actual error"); }; + let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); - let (mut buf, mut buf2) = (Vec::new(), Vec::new()); - let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2).unwrap(); + let mut stats = Statistics::default(); + let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); + let original_file_entry = + find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats).unwrap(); let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec(); let num_lines_in_original = { let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); @@ -81,9 +84,11 @@ pub fn file( )]; let mut out = Vec::new(); + let mut diff_state = gix_diff::tree::State::default(); 'outer: for item in traverse { let item = item?; let suspect = item.id; + stats.commits_traversed += 1; let mut parent_ids = item.parent_ids; if parent_ids.is_empty() { @@ -102,13 +107,15 @@ pub fn file( break; } - let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2) else { + let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats) else { continue; }; if parent_ids.len() == 1 { let parent_id = parent_ids.pop().expect("just validated there is exactly one"); - if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) { + if let Some(parent_entry) = + find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats) + { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are identical // which is why we can pass blame to the parent without further checks. @@ -119,7 +126,17 @@ pub fn file( } } - let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else { + let Some(modification) = tree_diff_at_file_path( + &odb, + file_path, + item.id, + parent_id, + &mut stats, + &mut diff_state, + &mut buf, + &mut buf2, + &mut buf3, + ) else { // None of the changes affected the file we’re currently blaming. Pass blame to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -142,8 +159,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -152,7 +168,8 @@ pub fn file( } } else { for parent_id in &parent_ids { - if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) + if let Some(parent_entry) = + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats) { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are @@ -167,7 +184,17 @@ pub fn file( } for parent_id in parent_ids { - let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id); + let changes_for_file_path = tree_diff_at_file_path( + &odb, + file_path, + item.id, + parent_id, + &mut stats, + &mut diff_state, + &mut buf, + &mut buf2, + &mut buf3, + ); let Some(modification) = changes_for_file_path else { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. @@ -188,8 +215,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -215,6 +241,7 @@ pub fn file( Ok(Outcome { entries: coalesce_blame_entries(out), blob: original_file_blob, + statistics: stats, }) } @@ -262,42 +289,37 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { }) } +#[allow(clippy::too_many_arguments)] fn tree_diff_at_file_path( odb: impl gix_object::Find + gix_object::FindHeader, file_path: &BStr, id: ObjectId, parent_id: ObjectId, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + commit_buf: &mut Vec, + lhs_tree_buf: &mut Vec, + rhs_tree_buf: &mut Vec, ) -> Option { - let mut buffer = Vec::new(); + let parent_tree = odb.find_commit(&parent_id, commit_buf).unwrap().tree(); + stats.commits_to_tree += 1; - let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); let parent_tree_iter = odb - .find(&parent.tree(), &mut buffer) + .find(&parent_tree, lhs_tree_buf) .unwrap() .try_into_tree_iter() .unwrap(); + stats.trees_decoded += 1; - let mut buffer = Vec::new(); - let commit = odb.find_commit(&id, &mut buffer).unwrap(); + let tree_id = odb.find_commit(&id, commit_buf).unwrap().tree(); + stats.commits_to_tree += 1; - let mut buffer = Vec::new(); - let tree_iter = odb - .find(&commit.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); + let tree_iter = odb.find(&tree_id, rhs_tree_buf).unwrap().try_into_tree_iter().unwrap(); + stats.trees_decoded += 1; let mut recorder = gix_diff::tree::Recorder::default(); - gix_diff::tree( - parent_tree_iter, - tree_iter, - gix_diff::tree::State::default(), - &odb, - &mut recorder, - ) - .unwrap(); + gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder).unwrap(); + stats.trees_diffed += 1; recorder.records.into_iter().find(|change| match change { gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, @@ -312,6 +334,7 @@ fn blob_changes( oid: ObjectId, previous_oid: ObjectId, file_path: &BStr, + stats: &mut Statistics, ) -> Vec { /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. struct ChangeRecorder { @@ -391,6 +414,7 @@ fn blob_changes( let number_of_lines_in_destination = input.after.len(); let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + stats.blobs_diffed += 1; gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) } @@ -400,12 +424,19 @@ fn find_path_entry_in_commit( file_path: &BStr, buf: &mut Vec, buf2: &mut Vec, + stats: &mut Statistics, ) -> Option { let commit_id = odb.find_commit(commit, buf).unwrap().tree(); let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap(); + stats.commits_to_tree += 1; + stats.trees_decoded += 1; tree_iter - .lookup_entry(odb, buf2, file_path.split(|b| *b == b'/')) + .lookup_entry( + odb, + buf2, + file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), + ) .unwrap() } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 7a7ebbcc24b..6ea0a3c61e5 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -15,7 +15,7 @@ #![forbid(unsafe_code)] mod types; -pub use types::{BlameEntry, Outcome}; +pub use types::{BlameEntry, Outcome, Statistics}; mod file; pub use file::function::file; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 3def1bddf9b..6dea55be399 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -8,12 +8,31 @@ use gix_hash::ObjectId; use gix_object::bstr::BString; /// The outcome of [`file()`](crate::file()). +#[derive(Debug, Clone)] pub struct Outcome { /// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines) /// that introduced it. pub entries: Vec, /// A buffer with the file content of the *Original File*, ready for tokenization. pub blob: Vec, + /// Additional information about the amount of work performed to produce the blame. + pub statistics: Statistics, +} + +/// Additional information about the performed operations. +#[derive(Debug, Default, Copy, Clone)] +pub struct Statistics { + /// The amount of commits it traversed until the blame was complete. + pub commits_traversed: usize, + /// The amount of commits whose trees were extracted. + pub commits_to_tree: usize, + /// The amount of trees that were decoded to find the entry of the file to blame. + pub trees_decoded: usize, + /// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified. + pub trees_diffed: usize, + /// The amount of blobs there were compared to each other to learn what changed between commits. + /// Note that in order to diff a blob, one needs to load both versions from the database. + pub blobs_diffed: usize, } impl Outcome { diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 20c0db777cc..625f9733268 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1533,14 +1533,16 @@ pub fn main() -> Result<()> { }, ), }, - Subcommands::Blame { file } => prepare_and_run( + Subcommands::Blame { statistics, file } => prepare_and_run( "blame", trace, verbose, progress, progress_keep_open, None, - move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out), + move |_progress, out, err| { + core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err)) + }, ), Subcommands::Completions { shell, out_dir } => { let mut app = Args::command(); diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 99d66a39861..a1f37b08e13 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -153,6 +153,10 @@ pub enum Subcommands { Free(free::Subcommands), /// Blame lines in a file Blame { + /// Print additional statistics to help understanding performance. + #[clap(long, short = 's')] + statistics: bool, + /// The file to create the blame information for. file: std::ffi::OsString, }, /// Generate shell completions to stdout or a directory.