Skip to content

Commit

Permalink
feat: Add support for statistics and additional performance information.
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Dec 24, 2024
1 parent a158d22 commit 4ffe6eb
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 38 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion gitoxide-core/src/repository/blame.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
use gix::bstr::BStr;
use std::ffi::OsStr;

pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> {
pub fn blame_file(
mut repo: gix::Repository,
file: &OsStr,
out: impl std::io::Write,
err: Option<&mut dyn std::io::Write>,
) -> anyhow::Result<()> {
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?));

let suspect = repo.head()?.peel_to_commit_in_place()?;
let traverse =
gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::<Vec<gix::ObjectId>>)
.with_commit_graph(repo.commit_graph_if_enabled()?)
.build()?;
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
let file_path: &BStr = gix::path::os_str_into_bstr(file)?;

let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?;
let statistics = outcome.statistics;
write_blame_entries(out, outcome)?;

if let Some(err) = err {
writeln!(err, "{statistics:#?}")?;
}
Ok(())
}

Expand Down
1 change: 1 addition & 0 deletions gix-blame/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ rust-version = "1.65"
doctest = false

[dependencies]
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] }
gix-object = { version = "^0.46.0", path = "../gix-object" }
gix-hash = { version = "^0.15.0", path = "../gix-hash" }
Expand Down
99 changes: 65 additions & 34 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::{process_changes, Change, Offset, UnblamedHunk};
use crate::{BlameEntry, Outcome};
use crate::{BlameEntry, Outcome, Statistics};
use gix_diff::blob::intern::TokenSource;
use gix_hash::ObjectId;
use gix_object::{bstr::BStr, FindExt};
Expand Down Expand Up @@ -62,9 +62,12 @@ pub fn file<E>(
let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else {
todo!("return actual error");
};
let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);

let (mut buf, mut buf2) = (Vec::new(), Vec::new());
let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2).unwrap();
let mut stats = Statistics::default();
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
let original_file_entry =
find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats).unwrap();
let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec();
let num_lines_in_original = {
let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100);
Expand All @@ -81,9 +84,11 @@ pub fn file<E>(
)];

let mut out = Vec::new();
let mut diff_state = gix_diff::tree::State::default();
'outer: for item in traverse {
let item = item?;
let suspect = item.id;
stats.commits_traversed += 1;

let mut parent_ids = item.parent_ids;
if parent_ids.is_empty() {
Expand All @@ -102,13 +107,15 @@ pub fn file<E>(
break;
}

let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2) else {
let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats) else {
continue;
};

if parent_ids.len() == 1 {
let parent_id = parent_ids.pop().expect("just validated there is exactly one");
if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) {
if let Some(parent_entry) =
find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats)
{
if entry.oid == parent_entry.oid {
// The blobs storing the blamed file in `entry` and `parent_entry` are identical
// which is why we can pass blame to the parent without further checks.
Expand All @@ -119,7 +126,17 @@ pub fn file<E>(
}
}

let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else {
let Some(modification) = tree_diff_at_file_path(
&odb,
file_path,
item.id,
parent_id,
&mut stats,
&mut diff_state,
&mut buf,
&mut buf2,
&mut buf3,
) else {
// None of the changes affected the file we’re currently blaming. Pass blame to parent.
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.pass_blame(suspect, parent_id);
Expand All @@ -142,8 +159,7 @@ pub fn file<E>(
}
gix_diff::tree::recorder::Change::Deletion { .. } => todo!(),
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path);

let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats);
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.pass_blame(suspect, parent_id);
Expand All @@ -152,7 +168,8 @@ pub fn file<E>(
}
} else {
for parent_id in &parent_ids {
if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2)
if let Some(parent_entry) =
find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)
{
if entry.oid == parent_entry.oid {
// The blobs storing the blamed file in `entry` and `parent_entry` are
Expand All @@ -167,7 +184,17 @@ pub fn file<E>(
}

for parent_id in parent_ids {
let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id);
let changes_for_file_path = tree_diff_at_file_path(
&odb,
file_path,
item.id,
parent_id,
&mut stats,
&mut diff_state,
&mut buf,
&mut buf2,
&mut buf3,
);
let Some(modification) = changes_for_file_path else {
// None of the changes affected the file we’re currently blaming. Pass blame
// to parent.
Expand All @@ -188,8 +215,7 @@ pub fn file<E>(
}
gix_diff::tree::recorder::Change::Deletion { .. } => todo!(),
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path);

let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats);
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.pass_blame(suspect, parent_id);
Expand All @@ -215,6 +241,7 @@ pub fn file<E>(
Ok(Outcome {
entries: coalesce_blame_entries(out),
blob: original_file_blob,
statistics: stats,
})
}

Expand Down Expand Up @@ -262,42 +289,37 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
})
}

#[allow(clippy::too_many_arguments)]
fn tree_diff_at_file_path(
odb: impl gix_object::Find + gix_object::FindHeader,
file_path: &BStr,
id: ObjectId,
parent_id: ObjectId,
stats: &mut Statistics,
state: &mut gix_diff::tree::State,
commit_buf: &mut Vec<u8>,
lhs_tree_buf: &mut Vec<u8>,
rhs_tree_buf: &mut Vec<u8>,
) -> Option<gix_diff::tree::recorder::Change> {
let mut buffer = Vec::new();
let parent_tree = odb.find_commit(&parent_id, commit_buf).unwrap().tree();
stats.commits_to_tree += 1;

let parent = odb.find_commit(&parent_id, &mut buffer).unwrap();

let mut buffer = Vec::new();
let parent_tree_iter = odb
.find(&parent.tree(), &mut buffer)
.find(&parent_tree, lhs_tree_buf)
.unwrap()
.try_into_tree_iter()
.unwrap();
stats.trees_decoded += 1;

let mut buffer = Vec::new();
let commit = odb.find_commit(&id, &mut buffer).unwrap();
let tree_id = odb.find_commit(&id, commit_buf).unwrap().tree();
stats.commits_to_tree += 1;

let mut buffer = Vec::new();
let tree_iter = odb
.find(&commit.tree(), &mut buffer)
.unwrap()
.try_into_tree_iter()
.unwrap();
let tree_iter = odb.find(&tree_id, rhs_tree_buf).unwrap().try_into_tree_iter().unwrap();
stats.trees_decoded += 1;

let mut recorder = gix_diff::tree::Recorder::default();
gix_diff::tree(
parent_tree_iter,
tree_iter,
gix_diff::tree::State::default(),
&odb,
&mut recorder,
)
.unwrap();
gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder).unwrap();
stats.trees_diffed += 1;

recorder.records.into_iter().find(|change| match change {
gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path,
Expand All @@ -312,6 +334,7 @@ fn blob_changes(
oid: ObjectId,
previous_oid: ObjectId,
file_path: &BStr,
stats: &mut Statistics,
) -> Vec<Change> {
/// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*.
struct ChangeRecorder {
Expand Down Expand Up @@ -391,6 +414,7 @@ fn blob_changes(
let number_of_lines_in_destination = input.after.len();
let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap());

stats.blobs_diffed += 1;
gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder)
}

Expand All @@ -400,12 +424,19 @@ fn find_path_entry_in_commit(
file_path: &BStr,
buf: &mut Vec<u8>,
buf2: &mut Vec<u8>,
stats: &mut Statistics,
) -> Option<gix_object::tree::Entry> {
let commit_id = odb.find_commit(commit, buf).unwrap().tree();
let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap();
stats.commits_to_tree += 1;
stats.trees_decoded += 1;

tree_iter
.lookup_entry(odb, buf2, file_path.split(|b| *b == b'/'))
.lookup_entry(
odb,
buf2,
file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
)
.unwrap()
}

Expand Down
2 changes: 1 addition & 1 deletion gix-blame/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#![forbid(unsafe_code)]

mod types;
pub use types::{BlameEntry, Outcome};
pub use types::{BlameEntry, Outcome, Statistics};

mod file;
pub use file::function::file;
19 changes: 19 additions & 0 deletions gix-blame/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,31 @@ use gix_hash::ObjectId;
use gix_object::bstr::BString;

/// The outcome of [`file()`](crate::file()).
#[derive(Debug, Clone)]
pub struct Outcome {
/// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines)
/// that introduced it.
pub entries: Vec<BlameEntry>,
/// A buffer with the file content of the *Original File*, ready for tokenization.
pub blob: Vec<u8>,
/// Additional information about the amount of work performed to produce the blame.
pub statistics: Statistics,
}

/// Additional information about the performed operations.
#[derive(Debug, Default, Copy, Clone)]
pub struct Statistics {
/// The amount of commits it traversed until the blame was complete.
pub commits_traversed: usize,
/// The amount of commits whose trees were extracted.
pub commits_to_tree: usize,
/// The amount of trees that were decoded to find the entry of the file to blame.
pub trees_decoded: usize,
/// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified.
pub trees_diffed: usize,
/// The amount of blobs there were compared to each other to learn what changed between commits.
/// Note that in order to diff a blob, one needs to load both versions from the database.
pub blobs_diffed: usize,
}

impl Outcome {
Expand Down
6 changes: 4 additions & 2 deletions src/plumbing/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1533,14 +1533,16 @@ pub fn main() -> Result<()> {
},
),
},
Subcommands::Blame { file } => prepare_and_run(
Subcommands::Blame { statistics, file } => prepare_and_run(
"blame",
trace,
verbose,
progress,
progress_keep_open,
None,
move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out),
move |_progress, out, err| {
core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err))
},
),
Subcommands::Completions { shell, out_dir } => {
let mut app = Args::command();
Expand Down
4 changes: 4 additions & 0 deletions src/plumbing/options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ pub enum Subcommands {
Free(free::Subcommands),
/// Blame lines in a file
Blame {
/// Print additional statistics to help understanding performance.
#[clap(long, short = 's')]
statistics: bool,
/// The file to create the blame information for.
file: std::ffi::OsString,
},
/// Generate shell completions to stdout or a directory.
Expand Down

0 comments on commit 4ffe6eb

Please sign in to comment.