From 47c8a1fb37d52fffb180a9c3a0c377e34b1b8d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Fri, 31 Jan 2025 20:45:49 +0100 Subject: [PATCH 1/4] feat: expose `commit::find` and `commit::Either` For a way to obtain a commit either by ODB lookup, or by retrieving it from the commit-graph. This is useful for the implementation of custom traversals. --- gix-traverse/src/commit/mod.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gix-traverse/src/commit/mod.rs b/gix-traverse/src/commit/mod.rs index 26e9287ae70..ccfc1ee5644 100644 --- a/gix-traverse/src/commit/mod.rs +++ b/gix-traverse/src/commit/mod.rs @@ -67,12 +67,18 @@ pub struct Info { pub commit_time: Option, } -enum Either<'buf, 'cache> { +/// Information about a commit that can be obtained either from a [`gix_object::CommitRefIter`] or +/// a [`gix_commitgraph::file::Commit`]. +pub enum Either<'buf, 'cache> { + /// See [`gix_object::CommitRefIter`]. CommitRefIter(gix_object::CommitRefIter<'buf>), + /// See [`gix_commitgraph::file::Commit`]. CachedCommit(gix_commitgraph::file::Commit<'cache>), } -fn find<'cache, 'buf, Find>( +/// Find information about a commit by either getting it from a [`gix_commitgraph::Graph`], if +/// present, or a [`gix_object::CommitRefIter`] otherwise. +pub fn find<'cache, 'buf, Find>( cache: Option<&'cache gix_commitgraph::Graph>, objects: Find, id: &gix_hash::oid, From 1250df3f9c10f66e4b8e227809831f3088482960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Fri, 31 Jan 2025 20:46:32 +0100 Subject: [PATCH 2/4] feat!: skip uninteresting commits for blame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is breaking because it takes a commitgraph cache as argument , and because it replaces the `traverse` by `suspect`. Switch to date order for traversing the commit history, as opposed to topo order. This is also what `git blame` does. Skip suspects that have no associated unblamed hunks Pass blame to parent in `process_change`. `git`’s algorithm only seems to keep the current suspect for unblamed hunks that were the direct result of splitting an existing unblamed hunk because it matched with a change. All other hunks appear to be blamed on the parent without further checks. Add assertion that lines always match. --- Cargo.lock | 3 + gix-blame/Cargo.toml | 3 + gix-blame/src/file/function.rs | 185 +++++++++++++++++++++++++++------ gix-blame/src/file/mod.rs | 32 +++--- gix-blame/src/file/tests.rs | 60 +++++------ gix-blame/tests/blame.rs | 33 +++--- 6 files changed, 226 insertions(+), 90 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d022dc163a6..0192c56a0b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1539,6 +1539,7 @@ dependencies = [ name = "gix-blame" version = "0.0.0" dependencies = [ + "gix-commitgraph 0.26.0", "gix-diff", "gix-filter", "gix-fs 0.13.0", @@ -1547,10 +1548,12 @@ dependencies = [ "gix-object 0.47.0", "gix-odb", "gix-ref 0.50.0", + "gix-revwalk 0.18.0", "gix-testtools", "gix-trace 0.1.12", "gix-traverse 0.44.0", "gix-worktree 0.39.0", + "smallvec", "thiserror 2.0.3", ] diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 0d8a0caa86e..f934c521da0 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,6 +14,8 @@ rust-version = "1.70" doctest = false [dependencies] +gix-commitgraph = { version = "^0.26.0", path = "../gix-commitgraph" } +gix-revwalk = { version = "^0.18.0", path = "../gix-revwalk" } gix-trace = { version = "^0.1.12", path = "../gix-trace" } gix-diff = { version = "^0.50.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.47.0", path = "../gix-object" } @@ -21,6 +23,7 @@ gix-hash = { version = "^0.16.0", path = "../gix-hash" } gix-worktree = { version = "^0.39.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } gix-traverse = { version = "^0.44.0", path = "../gix-traverse" } +smallvec = "1.10.0" thiserror = "2.0.0" [dev-dependencies] diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 80a14e7ec4a..3cd2fab6deb 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -7,22 +7,23 @@ use gix_object::{ bstr::{BStr, BString}, FindExt, }; +use gix_traverse::commit::find; +use smallvec::SmallVec; use std::num::NonZeroU32; use std::ops::Range; /// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file -/// at `traverse[0]:` originated in. +/// at `suspect:` originated in. /// /// ## Paramters /// /// * `odb` /// - Access to database objects, also for used for diffing. /// - Should have an object cache for good diff performance. -/// * `traverse` -/// - The list of commits from the most recent to prior ones, following all parents sorted -/// by time. -/// - It's paramount that older commits are returned after newer ones. -/// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. +/// * `suspect` +/// - The first commit to be responsible for parts of `file_path`. +/// * `cache` +/// - Optionally, the commitgraph cache. /// * `file_path` /// - A *slash-separated* worktree-relative path to the file to blame. /// * `range` @@ -60,20 +61,14 @@ use std::ops::Range; // <---><----------><-------><-----><-------> // <---><---><-----><-------><-----><-------> // <---><---><-----><-------><-----><-><-><-> -pub fn file( +pub fn file( odb: impl gix_object::Find + gix_object::FindHeader, - traverse: impl IntoIterator>, + suspect: ObjectId, + cache: Option, resource_cache: &mut gix_diff::blob::Platform, file_path: &BStr, range: Option>, -) -> Result -where - E: Into>, -{ - let mut traverse = traverse.into_iter().peekable(); - let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { - return Err(Error::EmptyTraversal); - }; +) -> Result { let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); let mut stats = Statistics::default(); @@ -103,25 +98,43 @@ where suspects: [(suspect, range_in_blamed_file)].into(), }]; + let mut buf = Vec::new(); + let commit = find(cache.as_ref(), &odb, &suspect, &mut buf)?; + + let mut queue: gix_revwalk::PriorityQueue = gix_revwalk::PriorityQueue::new(); + + let commit_time = commit_time(commit); + queue.insert(commit_time, suspect); + let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); let mut previous_entry: Option<(ObjectId, ObjectId)> = None; - 'outer: while let Some(item) = traverse.next() { + 'outer: while let Some(suspect) = queue.pop_value() { if hunks_to_blame.is_empty() { break; } - let commit = item.map_err(|err| Error::Traverse(err.into()))?; - let suspect = commit.id; + + let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.suspects.contains_key(&suspect)); + + if !is_still_suspect { + // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with + // the next one. + continue 'outer; + } + stats.commits_traversed += 1; - let parent_ids = commit.parent_ids; + let commit = find(cache.as_ref(), &odb, &suspect, &mut buf)?; + + let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref()); + if parent_ids.is_empty() { - if traverse.peek().is_none() { - // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of - // the last `item` that was yielded by `traverse`, so it makes sense to assign the - // remaining lines to it, even though we don’t explicitly check whether that is true - // here. We could perhaps use diff-tree-to-tree to compare `suspect` - // against an empty tree to validate this assumption. + if queue.is_empty() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the + // `id` of the last `item` that was yielded by `queue`, so it makes sense to assign + // the remaining lines to it, even though we don’t explicitly check whether that is + // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against + // an empty tree to validate this assumption. if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { break 'outer; } @@ -143,7 +156,41 @@ where continue; }; - for (pid, parent_id) in parent_ids.iter().enumerate() { + // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are + // identical to the corresponding lines in the *Source File*. + #[cfg(debug_assertions)] + { + let source_blob = odb.find_blob(&entry_id, &mut buf)?.data.to_vec(); + let mut source_interner = gix_diff::blob::intern::Interner::new(source_blob.len() / 100); + let source_lines_as_tokens: Vec<_> = tokens_for_diffing(&source_blob) + .tokenize() + .map(|token| source_interner.intern(token)) + .collect(); + + let mut blamed_interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100); + let blamed_lines_as_tokens: Vec<_> = tokens_for_diffing(&blamed_file_blob) + .tokenize() + .map(|token| blamed_interner.intern(token)) + .collect(); + + for hunk in hunks_to_blame.iter() { + if let Some(range_in_suspect) = hunk.suspects.get(&suspect) { + let range_in_blamed_file = hunk.range_in_blamed_file.clone(); + + for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) { + let source_token = source_lines_as_tokens[source_line_number as usize]; + let blame_token = blamed_lines_as_tokens[blamed_line_number as usize]; + + let source_line = BString::new(source_interner[source_token].into()); + let blamed_line = BString::new(blamed_interner[blame_token].into()); + + assert_eq!(source_line, blamed_line); + } + } + } + } + + for (pid, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() { if let Some(parent_entry_id) = find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { @@ -153,17 +200,19 @@ where } if no_change_in_entry { pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame); + queue.insert(*parent_commit_time, *parent_id); continue 'outer; } } } let more_than_one_parent = parent_ids.len() > 1; - for parent_id in parent_ids { + for (parent_id, parent_commit_time) in parent_ids { + queue.insert(parent_commit_time, parent_id); let changes_for_file_path = tree_diff_at_file_path( &odb, file_path, - commit.id, + suspect, parent_id, &mut stats, &mut diff_state, @@ -588,8 +637,82 @@ fn find_path_entry_in_commit( Ok(res.map(|e| e.oid)) } -/// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them -/// so the later access shows the right thing. +type CommitTime = i64; + +fn commit_time(commit: gix_traverse::commit::Either<'_, '_>) -> CommitTime { + match commit { + gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => { + let mut commit_time = 0; + for token in commit_ref_iter { + use gix_object::commit::ref_iter::Token as T; + match token { + Ok(T::Tree { .. }) => continue, + Ok(T::Parent { .. }) => continue, + Ok(T::Author { .. }) => continue, + Ok(T::Committer { signature }) => { + commit_time = signature.time.seconds; + break; + } + Ok(_unused_token) => break, + Err(_err) => todo!(), + } + } + commit_time + } + gix_traverse::commit::Either::CachedCommit(commit) => commit.committer_timestamp() as i64, + } +} + +type ParentIds = SmallVec<[(gix_hash::ObjectId, i64); 2]>; + +fn collect_parents( + commit: gix_traverse::commit::Either<'_, '_>, + odb: &impl gix_object::Find, + cache: Option<&gix_commitgraph::Graph>, +) -> ParentIds { + let mut parent_ids: ParentIds = Default::default(); + + match commit { + gix_traverse::commit::Either::CachedCommit(commit) => { + let cache = cache + .as_ref() + .expect("find returned a cached commit, so we expect cache to be present"); + for parent_id in commit.iter_parents() { + match parent_id { + Ok(pos) => { + let parent = cache.commit_at(pos); + + parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64)); + } + Err(_) => todo!(), + } + } + } + gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => { + for token in commit_ref_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + let mut buf = Vec::new(); + let parent = odb.find_commit_iter(id.as_ref(), &mut buf).ok(); + let parent_commit_time = parent + .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds)) + .unwrap_or_default(); + + parent_ids.push((id, parent_commit_time)); + } + Ok(_unused_token) => break, + Err(_err) => todo!(), + } + } + } + }; + + parent_ids +} + +/// Return an iterator over tokens for use in diffing. These are usually lines, but it's important +/// to unify them so the later access shows the right thing. pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { gix_diff::blob::sources::byte_lines_with_terminator(data) } diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 086923bac45..668be563533 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -66,7 +66,7 @@ fn process_change( // Nothing to do with `hunk` except shifting it, // but `unchanged` needs to be checked against the next hunk to catch up. - new_hunks_to_blame.push(hunk.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(hunk.passed_blame(suspect, parent).shift_by(parent, *offset)); (None, Some(Change::Unchanged(unchanged))) } (false, false) => { @@ -95,7 +95,7 @@ fn process_change( // Nothing to do with `hunk` except shifting it, // but `unchanged` needs to be checked against the next hunk to catch up. - new_hunks_to_blame.push(hunk.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(hunk.passed_blame(suspect, parent).shift_by(parent, *offset)); (None, Some(Change::Unchanged(unchanged))) } } @@ -125,7 +125,7 @@ fn process_change( } Either::Right((before, after)) => { // requeue the left side `before` after offsetting it… - new_hunks_to_blame.push(before.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(before.passed_blame(suspect, parent).shift_by(parent, *offset)); // …and treat `after` as `new_hunk`, which contains the `added` range. after } @@ -162,7 +162,7 @@ fn process_change( Either::Left(hunk) => hunk, Either::Right((before, after)) => { // Keep looking for the left side of the unblamed portion. - new_hunks_to_blame.push(before.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(before.passed_blame(suspect, parent).shift_by(parent, *offset)); after } }; @@ -220,7 +220,7 @@ fn process_change( // <----> (added) // Retry `hunk` once there is overlapping changes to process. - new_hunks_to_blame.push(hunk.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(hunk.passed_blame(suspect, parent).shift_by(parent, *offset)); // Let hunks catchup with this change. ( @@ -273,7 +273,7 @@ fn process_change( } Either::Right((before, after)) => { // `before` isn't affected by deletion, so keep it for later. - new_hunks_to_blame.push(before.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(before.passed_blame(suspect, parent).shift_by(parent, *offset)); // after will be affected by offset, and we will see if there are more changes affecting it. after } @@ -285,7 +285,7 @@ fn process_change( // | (line_number_in_destination) // Catchup with changes. - new_hunks_to_blame.push(hunk.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(hunk.passed_blame(suspect, parent).shift_by(parent, *offset)); ( None, @@ -295,7 +295,7 @@ fn process_change( } (Some(hunk), None) => { // nothing to do - changes are exhausted, re-evaluate `hunk`. - new_hunks_to_blame.push(hunk.cloned_blame(suspect, parent).shift_by(parent, *offset)); + new_hunks_to_blame.push(hunk.passed_blame(suspect, parent).shift_by(parent, *offset)); (None, None) } (None, Some(Change::Unchanged(_))) => { @@ -402,18 +402,20 @@ impl UnblamedHunk { } } - /// Transfer all ranges from the commit at `from` to the commit at `to`. - fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + /// This is like [`Self::pass_blame()`], but easier to use in places where the 'passing' is + /// done 'inline'. + fn passed_blame(mut self, from: ObjectId, to: ObjectId) -> Self { if let Some(range_in_suspect) = self.suspects.remove(&from) { self.suspects.insert(to, range_in_suspect); } + self } - /// This is like [`Self::clone_blame()`], but easier to use in places - /// where the cloning is done 'inline'. - fn cloned_blame(mut self, from: ObjectId, to: ObjectId) -> Self { - self.clone_blame(from, to); - self + /// Transfer all ranges from the commit at `from` to the commit at `to`. + fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.remove(&from) { + self.suspects.insert(to, range_in_suspect); + } } fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 12b17941e12..1366fcfea9c 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -115,7 +115,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 0..2, - suspects: [(suspect, 0..2), (parent, 0..2)].into() + suspects: [(parent, 0..2)].into() }, UnblamedHunk { range_in_blamed_file: 2..3, @@ -155,7 +155,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 10..12, - suspects: [(suspect, 10..12), (parent, 5..7)].into() + suspects: [(parent, 5..7)].into() }, UnblamedHunk { range_in_blamed_file: 12..13, @@ -196,7 +196,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(suspect, 7..9), (parent, 7..9)].into() + suspects: [(parent, 7..9)].into() }, UnblamedHunk { range_in_blamed_file: 14..15, @@ -306,7 +306,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 3..4, - suspects: [(suspect, 2..3), (parent, 0..1)].into() + suspects: [(parent, 0..1)].into() }, UnblamedHunk { range_in_blamed_file: 4..6, @@ -399,7 +399,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 71..107, - suspects: [(suspect, 70..106), (parent, 70..106)].into() + suspects: [(parent, 70..106)].into() }, UnblamedHunk { range_in_blamed_file: 107..109, @@ -434,7 +434,7 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 149..155, - suspects: [(suspect, 137..143), (parent, 137..143)].into() + suspects: [(parent, 137..143)].into() }, UnblamedHunk { range_in_blamed_file: 155..156, @@ -468,7 +468,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 3..6, - suspects: [(suspect, 2..5), (parent, 5..8)].into() + suspects: [(parent, 5..8)].into() }] ); assert_eq!(offset_in_destination, Offset::Deleted(3)); @@ -584,7 +584,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 15..16, - suspects: [(suspect, 17..18), (parent, 16..17)].into() + suspects: [(parent, 16..17)].into() }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -677,7 +677,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(suspect, 13..15), (parent, 10..12)].into() + suspects: [(parent, 10..12)].into() }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -706,7 +706,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 110..114, - suspects: [(suspect, 109..113), (parent, 106..110)].into() + suspects: [(parent, 106..110)].into() }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -762,7 +762,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5), (parent, 0..5)].into() + suspects: [(parent, 0..5)].into() }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -821,7 +821,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5), (parent, 0..5)].into() + suspects: [(parent, 0..5)].into() }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -883,7 +883,7 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 2..14, - suspects: [(suspect, 2..14), (parent, 2..14)].into() + suspects: [(parent, 2..14)].into() }] ); assert_eq!(offset_in_destination, Offset::Deleted(4)); @@ -1003,7 +1003,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 4..6), (parent, 0..2)].into(), + suspects: [(parent, 0..2)].into(), }, ] ); @@ -1026,7 +1026,7 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..2, - suspects: [(suspect, 0..2), (parent, 0..2)].into(), + suspects: [(parent, 0..2)].into(), }, UnblamedHunk { range_in_blamed_file: 2..4, @@ -1034,7 +1034,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 4..6), (parent, 2..4)].into(), + suspects: [(parent, 2..4)].into(), }, ] ); @@ -1065,7 +1065,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 4..6), (parent, 0..2)].into(), + suspects: [(parent, 0..2)].into(), } ] ); @@ -1088,7 +1088,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 1..6, - suspects: [(suspect, 1..6), (parent, 0..5)].into(), + suspects: [(parent, 0..5)].into(), } ] ); @@ -1111,7 +1111,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 3..6, - suspects: [(suspect, 1..4), (parent, 0..3)].into(), + suspects: [(parent, 0..3)].into(), } ] ); @@ -1134,7 +1134,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 4..6), (parent, 3..5)].into(), + suspects: [(parent, 3..5)].into(), } ] ); @@ -1152,7 +1152,7 @@ mod process_changes { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 3..5), (parent, 0..2)].into(), + suspects: [(parent, 0..2)].into(), }] ); } @@ -1174,7 +1174,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 2..3, - suspects: [(suspect, 1..2), (parent, 2..3)].into(), + suspects: [(parent, 2..3)].into(), } ] ); @@ -1201,7 +1201,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 2..3, - suspects: [(suspect, 2..3), (parent, 0..1)].into(), + suspects: [(parent, 0..1)].into(), }, UnblamedHunk { range_in_blamed_file: 3..4, @@ -1237,7 +1237,7 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..16, - suspects: [(suspect, 0..16), (parent, 0..16)].into() + suspects: [(parent, 0..16)].into() }, UnblamedHunk { range_in_blamed_file: 16..17, @@ -1245,11 +1245,11 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 17..30, - suspects: [(suspect, 17..30), (parent, 16..29)].into() + suspects: [(parent, 16..29)].into() }, UnblamedHunk { range_in_blamed_file: 31..37, - suspects: [(suspect, 31..37), (parent, 30..36)].into() + suspects: [(parent, 30..36)].into() } ] ); @@ -1285,11 +1285,11 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 1..3, - suspects: [(suspect, 1..3), (parent, 1..3)].into(), + suspects: [(parent, 1..3)].into(), }, UnblamedHunk { range_in_blamed_file: 5..6, - suspects: [(suspect, 5..6), (parent, 5..6)].into(), + suspects: [(parent, 5..6)].into(), }, UnblamedHunk { range_in_blamed_file: 6..7, @@ -1301,7 +1301,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 9..10, - suspects: [(suspect, 9..10), (parent, 6..7)].into(), + suspects: [(parent, 6..7)].into(), }, ] ); @@ -1327,7 +1327,7 @@ mod process_changes { }, UnblamedHunk { range_in_blamed_file: 4..7, - suspects: [(suspect, 4..7), (parent, 3..6)].into() + suspects: [(parent, 3..6)].into() } ] ); diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 7119bf75fa4..cdd2f5c6186 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -110,7 +110,7 @@ mod baseline { struct Fixture { odb: gix_odb::Handle, resource_cache: gix_diff::blob::Platform, - commits: Vec>, + suspect: ObjectId, } impl Fixture { @@ -137,10 +137,6 @@ impl Fixture { let head_id = reference.peel_to_id_in_place(&store, &odb)?; - let commits: Vec<_> = gix_traverse::commit::topo::Builder::from_iters(&odb, [head_id], None::>) - .build()? - .collect(); - let git_dir = worktree_path.join(".git"); let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; let stack = gix_worktree::Stack::from_state_and_ignore_case( @@ -174,7 +170,7 @@ impl Fixture { Ok(Fixture { odb, resource_cache, - commits, + suspect: head_id, }) } } @@ -186,12 +182,13 @@ macro_rules! mktest { let Fixture { odb, mut resource_cache, - commits, + suspect, } = Fixture::new()?; let lines_blamed = gix_blame::file( &odb, - commits, + suspect, + None, &mut resource_cache, format!("{}.txt", $case).as_str().into(), None, @@ -247,12 +244,13 @@ fn diff_disparity() { let Fixture { odb, mut resource_cache, - commits, + suspect, } = Fixture::new().unwrap(); let lines_blamed = gix_blame::file( &odb, - commits, + suspect, + None, &mut resource_cache, format!("{case}.txt").as_str().into(), None, @@ -274,12 +272,19 @@ fn line_range() { let Fixture { odb, mut resource_cache, - commits, + suspect, } = Fixture::new().unwrap(); - let lines_blamed = gix_blame::file(&odb, commits, &mut resource_cache, "simple.txt".into(), Some(1..2)) - .unwrap() - .entries; + let lines_blamed = gix_blame::file( + &odb, + suspect, + None, + &mut resource_cache, + "simple.txt".into(), + Some(1..2), + ) + .unwrap() + .entries; assert_eq!(lines_blamed.len(), 2); From fd63dd6bd0574a5207c23c0303cd3e86da7cb9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Fri, 31 Jan 2025 20:50:32 +0100 Subject: [PATCH 3/4] Adapt to changes in `gix-blame` --- gitoxide-core/src/repository/blame.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index ce1ec9c5c6a..2d2930b30dd 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -35,13 +35,17 @@ pub fn blame_file( .next() .expect("exactly one pattern"); - let suspect = repo.head()?.peel_to_commit_in_place()?; - let traverse = - gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) - .with_commit_graph(repo.commit_graph_if_enabled()?) - .build()?; + let suspect: gix::ObjectId = repo.head()?.into_peeled_id()?.into(); + let cache: Option = repo.commit_graph_if_enabled()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; - let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr(), range)?; + let outcome = gix::blame::file( + &repo.objects, + suspect, + cache, + &mut resource_cache, + file.as_bstr(), + range, + )?; let statistics = outcome.statistics; write_blame_entries(out, outcome)?; From 442883800bc3abe63592ec36cb03b7c7e55c0f34 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 1 Feb 2025 16:22:49 +0100 Subject: [PATCH 4/4] refactor --- gix-blame/src/error.rs | 4 ++ gix-blame/src/file/function.rs | 91 +++++++++------------------------- 2 files changed, 27 insertions(+), 68 deletions(-) diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs index 4aa4b0ceb10..979cc8cd6ef 100644 --- a/gix-blame/src/error.rs +++ b/gix-blame/src/error.rs @@ -29,4 +29,8 @@ pub enum Error { DiffTree(#[from] gix_diff::tree::Error), #[error("Invalid line range was given, line range is expected to be a 1-based inclusive range in the format ','")] InvalidLineRange, + #[error("Failure to decode commit during traversal")] + DecodeCommit(#[from] gix_object::decode::Error), + #[error("Failed to get parent from commitgraph during traversal")] + GetParentFromCommitGraph(#[from] gix_commitgraph::file::commit::Error), } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 3cd2fab6deb..f6368dde18f 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -7,7 +7,7 @@ use gix_object::{ bstr::{BStr, BString}, FindExt, }; -use gix_traverse::commit::find; +use gix_traverse::commit::find as find_commit; use smallvec::SmallVec; use std::num::NonZeroU32; use std::ops::Range; @@ -79,13 +79,7 @@ pub fn file( commit_id: suspect, })?; let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec(); - let num_lines_in_blamed = { - let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100); - tokens_for_diffing(&blamed_file_blob) - .tokenize() - .map(|token| interner.intern(token)) - .count() - } as u32; + let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32; // Binary or otherwise empty? if num_lines_in_blamed == 0 { @@ -98,36 +92,29 @@ pub fn file( suspects: [(suspect, range_in_blamed_file)].into(), }]; - let mut buf = Vec::new(); - let commit = find(cache.as_ref(), &odb, &suspect, &mut buf)?; - + let (mut buf, mut buf2) = (Vec::new(), Vec::new()); + let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?; let mut queue: gix_revwalk::PriorityQueue = gix_revwalk::PriorityQueue::new(); - - let commit_time = commit_time(commit); - queue.insert(commit_time, suspect); + queue.insert(commit_time(commit)?, suspect); let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); let mut previous_entry: Option<(ObjectId, ObjectId)> = None; 'outer: while let Some(suspect) = queue.pop_value() { + stats.commits_traversed += 1; if hunks_to_blame.is_empty() { break; } let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.suspects.contains_key(&suspect)); - if !is_still_suspect { // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with // the next one. continue 'outer; } - stats.commits_traversed += 1; - - let commit = find(cache.as_ref(), &odb, &suspect, &mut buf)?; - - let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref()); - + let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?; + let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?; if parent_ids.is_empty() { if queue.is_empty() { // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the @@ -139,7 +126,6 @@ pub fn file( break 'outer; } } - // There is more, keep looking. continue; } @@ -639,27 +625,12 @@ fn find_path_entry_in_commit( type CommitTime = i64; -fn commit_time(commit: gix_traverse::commit::Either<'_, '_>) -> CommitTime { +fn commit_time(commit: gix_traverse::commit::Either<'_, '_>) -> Result { match commit { gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => { - let mut commit_time = 0; - for token in commit_ref_iter { - use gix_object::commit::ref_iter::Token as T; - match token { - Ok(T::Tree { .. }) => continue, - Ok(T::Parent { .. }) => continue, - Ok(T::Author { .. }) => continue, - Ok(T::Committer { signature }) => { - commit_time = signature.time.seconds; - break; - } - Ok(_unused_token) => break, - Err(_err) => todo!(), - } - } - commit_time + commit_ref_iter.committer().map(|c| c.time.seconds) } - gix_traverse::commit::Either::CachedCommit(commit) => commit.committer_timestamp() as i64, + gix_traverse::commit::Either::CachedCommit(commit) => Ok(commit.committer_timestamp() as i64), } } @@ -669,46 +640,30 @@ fn collect_parents( commit: gix_traverse::commit::Either<'_, '_>, odb: &impl gix_object::Find, cache: Option<&gix_commitgraph::Graph>, -) -> ParentIds { + buf: &mut Vec, +) -> Result { let mut parent_ids: ParentIds = Default::default(); - match commit { gix_traverse::commit::Either::CachedCommit(commit) => { let cache = cache .as_ref() .expect("find returned a cached commit, so we expect cache to be present"); - for parent_id in commit.iter_parents() { - match parent_id { - Ok(pos) => { - let parent = cache.commit_at(pos); - - parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64)); - } - Err(_) => todo!(), - } + for parent_pos in commit.iter_parents() { + let parent = cache.commit_at(parent_pos?); + parent_ids.push((parent.id().to_owned(), parent.committer_timestamp() as i64)); } } gix_traverse::commit::Either::CommitRefIter(commit_ref_iter) => { - for token in commit_ref_iter { - match token { - Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, - Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { - let mut buf = Vec::new(); - let parent = odb.find_commit_iter(id.as_ref(), &mut buf).ok(); - let parent_commit_time = parent - .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds)) - .unwrap_or_default(); - - parent_ids.push((id, parent_commit_time)); - } - Ok(_unused_token) => break, - Err(_err) => todo!(), - } + for id in commit_ref_iter.parent_ids() { + let parent = odb.find_commit_iter(id.as_ref(), buf).ok(); + let parent_commit_time = parent + .and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds)) + .unwrap_or_default(); + parent_ids.push((id, parent_commit_time)); } } }; - - parent_ids + Ok(parent_ids) } /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important