Skip to content

Commit

Permalink
Rework how blame is passed to parents
Browse files Browse the repository at this point in the history
Before this commit, blame would have been passed to the first parent in
cases where there was more than one.

This commit changes that by only removing a particular suspect from
further consideration once it has been compared to all of its parents.
For hunks where blame cannot be passed to any parent, we can safely
assume that they were introduced by a particular suspect, so we remove
those hunks from `hunks_to_blame` and create a `BlameEntry` out of them.

We can illustrate the change using the following small example history:

```text
---1----2
    \    \
     3----4---
```

Let’s now say that we’re blaming a file that has the following content:

```text
line 1 # introduced by (1), then changed by (3)
line 2 # introduced by (1)
line 3 # introduced by (1), then changed by (2)
```

The resulting blame should look like this:

```text
(3) line 1
(1) line 2
(2) line 3
```

The previous version of the algorithm would have passed blame to just
(2) or (3), depending on which one came first in the list of parents.
  • Loading branch information
cruessler committed Jan 9, 2025
1 parent 8d84818 commit c8c1b63
Show file tree
Hide file tree
Showing 5 changed files with 500 additions and 517 deletions.
60 changes: 53 additions & 7 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,10 @@ where
if more_than_one_parent {
// None of the changes affected the file we’re currently blaming.
// Copy blame to parent.
for unblamed_hunk in &mut hunks_to_blame {
unblamed_hunk.clone_blame(suspect, parent_id);
}
hunks_to_blame = hunks_to_blame
.into_iter()
.map(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id))
.collect();
} else {
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
}
Expand All @@ -196,14 +197,59 @@ where
}
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?;
hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect);
pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame);
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
}
}
}
if more_than_one_parent {
for unblamed_hunk in &mut hunks_to_blame {

hunks_to_blame = hunks_to_blame
.into_iter()
.filter_map(|mut unblamed_hunk| {
if unblamed_hunk.suspects.len() == 1 {
if let Some(entry) = BlameEntry::from_unblamed_hunk(&unblamed_hunk, suspect) {
// At this point, we have copied blame for every hunk to a parent. Hunks
// that have only `suspect` left in `suspects` have not passed blame to any
// parent and so they can be converted to a `BlameEntry` and moved to
// `out`.
out.push(entry);

return None;
}
}

unblamed_hunk.remove_blame(suspect);

Some(unblamed_hunk)
})
.collect();

// This block asserts that line ranges for each suspect never overlap. If they did overlap
// this would mean that the same line in a *Source File* would map to more than one line in
// the *Blamed File* and this is not possible.
#[cfg(debug_assertions)]
{
let ranges = hunks_to_blame.iter().fold(
std::collections::BTreeMap::<ObjectId, Vec<Range<u32>>>::new(),
|mut acc, hunk| {
for (suspect, range) in hunk.suspects.clone() {
acc.entry(suspect).or_insert(Vec::new()).push(range);
}

acc
},
);

for (_, mut ranges) in ranges {
ranges.sort_by(|a, b| a.start.cmp(&b.start));

for window in ranges.windows(2) {
match window {
[a, b] => {
assert!(a.end <= b.start, "#{hunks_to_blame:#?}");
}
_ => {}
}
}
}
}
}
Expand Down
119 changes: 37 additions & 82 deletions gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ pub(super) mod function;
///
/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*.
fn process_change(
out: &mut Vec<BlameEntry>,
new_hunks_to_blame: &mut Vec<UnblamedHunk>,
offset: &mut Offset,
suspect: ObjectId,
parent: ObjectId,
hunk: Option<UnblamedHunk>,
change: Option<Change>,
) -> (Option<UnblamedHunk>, Option<Change>) {
Expand All @@ -40,6 +40,8 @@ fn process_change(
match (hunk, change) {
(Some(hunk), Some(Change::Unchanged(unchanged))) => {
let Some(range_in_suspect) = hunk.suspects.get(&suspect) else {
// We don’t clone blame to `parent` as `suspect` has nothing to do with this
// `hunk`.
new_hunks_to_blame.push(hunk);
return (None, Some(Change::Unchanged(unchanged)));
};
Expand All @@ -64,7 +66,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
(false, false) => {
Expand Down Expand Up @@ -93,7 +95,7 @@ fn process_change(

// Nothing to do with `hunk` except shifting it,
// but `unchanged` needs to be checked against the next hunk to catch up.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, Some(Change::Unchanged(unchanged)))
}
}
Expand Down Expand Up @@ -123,7 +125,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// requeue the left side `before` after offsetting it…
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// …and treat `after` as `new_hunk`, which contains the `added` range.
after
}
Expand All @@ -132,20 +134,18 @@ fn process_change(
*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

// The overlapping `added` section was successfully located.
out.push(BlameEntry::with_offset(
added.clone(),
suspect,
hunk_starting_at_added.offset_for(suspect),
));

// The overlapping `added` section was successfully located.
// Re-split at the end of `added` to continue with what's after.
match hunk_starting_at_added.split_at(suspect, added.end) {
Either::Left(_) => {
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

// Nothing to split, so we are done with this hunk.
(None, None)
}
Either::Right((_, after)) => {
Either::Right((hunk, after)) => {
new_hunks_to_blame.push(hunk);

// Keep processing the unblamed range after `added`
(Some(after), None)
}
Expand All @@ -162,17 +162,13 @@ fn process_change(
Either::Left(hunk) => hunk,
Either::Right((before, after)) => {
// Keep looking for the left side of the unblamed portion.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
after
}
};

// We can 'blame' the overlapping area of `added` and `hunk`.
out.push(BlameEntry::with_offset(
added.start..range_in_suspect.end,
suspect,
hunk_starting_at_added.offset_for(suspect),
));
new_hunks_to_blame.push(hunk_starting_at_added);
// Keep processing `added`, it's portion past `hunk` may still contribute.
(None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted)))
}
Expand All @@ -183,18 +179,20 @@ fn process_change(
// <---> (blamed)
// <--> (new hunk)

out.push(BlameEntry::with_offset(
range_in_suspect.start..added.end,
suspect,
hunk.offset_for(suspect),
));

*offset += added.end - added.start;
*offset -= number_of_lines_deleted;

match hunk.split_at(suspect, added.end) {
Either::Left(_) => (None, None),
Either::Right((_, after)) => (Some(after), None),
Either::Left(hunk) => {
new_hunks_to_blame.push(hunk);

(None, None)
}
Either::Right((before, after)) => {
new_hunks_to_blame.push(before);

(Some(after), None)
}
}
}
(false, false) => {
Expand Down Expand Up @@ -222,7 +220,7 @@ fn process_change(
// <----> (added)

// Retry `hunk` once there is overlapping changes to process.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

// Let hunks catchup with this change.
(
Expand All @@ -237,11 +235,7 @@ fn process_change(
// <---> (blamed)

// Successfully blame the whole range.
out.push(BlameEntry::with_offset(
range_in_suspect.clone(),
suspect,
hunk.offset_for(suspect),
));
new_hunks_to_blame.push(hunk);

// And keep processing `added` with future `hunks` that might be affected by it.
(
Expand Down Expand Up @@ -279,7 +273,7 @@ fn process_change(
}
Either::Right((before, after)) => {
// `before` isn't affected by deletion, so keep it for later.
new_hunks_to_blame.push(before.shift_by(suspect, *offset));
new_hunks_to_blame.push(before.clone_blame(suspect, parent).shift_by(parent, *offset));
// after will be affected by offset, and we will see if there are more changes affecting it.
after
}
Expand All @@ -291,7 +285,8 @@ fn process_change(
// | (line_number_in_destination)

// Catchup with changes.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));

(
None,
Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)),
Expand All @@ -300,7 +295,7 @@ fn process_change(
}
(Some(hunk), None) => {
// nothing to do - changes are exhausted, re-evaluate `hunk`.
new_hunks_to_blame.push(hunk.shift_by(suspect, *offset));
new_hunks_to_blame.push(hunk.clone_blame(suspect, parent).shift_by(parent, *offset));
(None, None)
}
(None, Some(Change::Unchanged(_))) => {
Expand Down Expand Up @@ -328,10 +323,10 @@ fn process_change(
/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other.
/// Once a match is found, it's pushed onto `out`.
fn process_changes(
out: &mut Vec<BlameEntry>,
hunks_to_blame: Vec<UnblamedHunk>,
changes: Vec<Change>,
suspect: ObjectId,
parent: ObjectId,
) -> Vec<UnblamedHunk> {
let mut hunks_iter = hunks_to_blame.into_iter();
let mut changes_iter = changes.into_iter();
Expand All @@ -344,10 +339,10 @@ fn process_changes(

loop {
(hunk, change) = process_change(
out,
&mut new_hunks_to_blame,
&mut offset_in_destination,
suspect,
parent,
hunk,
change,
);
Expand Down Expand Up @@ -407,30 +402,20 @@ impl UnblamedHunk {
}
}

fn offset_for(&self, suspect: ObjectId) -> Offset {
let range_in_suspect = self
.suspects
.get(&suspect)
.expect("Internal and we know suspect is present");

if self.range_in_blamed_file.start > range_in_suspect.start {
Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start)
} else {
Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start)
}
}

/// Transfer all ranges from the commit at `from` to the commit at `to`.
fn pass_blame(&mut self, from: ObjectId, to: ObjectId) {
if let Some(range_in_suspect) = self.suspects.remove(&from) {
self.suspects.insert(to, range_in_suspect);
}
}

fn clone_blame(&mut self, from: ObjectId, to: ObjectId) {
// TODO
// Should this also accept `&mut self` as the other functions do?
fn clone_blame(mut self, from: ObjectId, to: ObjectId) -> Self {
if let Some(range_in_suspect) = self.suspects.get(&from) {
self.suspects.insert(to, range_in_suspect.clone());
}
self
}

fn remove_blame(&mut self, suspect: ObjectId) {
Expand All @@ -439,36 +424,6 @@ impl UnblamedHunk {
}

impl BlameEntry {
/// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`.
fn with_offset(range_in_source_file: Range<u32>, commit_id: ObjectId, offset: Offset) -> Self {
debug_assert!(
range_in_source_file.end > range_in_source_file.start,
"{range_in_source_file:?}"
);

match offset {
Offset::Added(added) => Self {
start_in_blamed_file: range_in_source_file.start + added,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
},
Offset::Deleted(deleted) => {
debug_assert!(
range_in_source_file.start >= deleted,
"{range_in_source_file:?} {offset:?}"
);

Self {
start_in_blamed_file: range_in_source_file.start - deleted,
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
}
}
}
}

/// Create an offset from a portion of the *Blamed File*.
fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option<Self> {
let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?;
Expand Down
Loading

0 comments on commit c8c1b63

Please sign in to comment.