Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start blame from cache #1852

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gitoxide-core/src/repository/blame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub fn blame_file(
suspect,
cache,
&mut resource_cache,
None,
file.as_bstr(),
range,
)?;
Expand Down
77 changes: 57 additions & 20 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use super::{process_changes, Change, UnblamedHunk};
use super::{process_changes, update_blame_with_changes, Change, UnblamedHunk};
use crate::types::BlameCacheObject;
use crate::{BlameEntry, Error, Outcome, Statistics};
use gix_diff::blob::intern::TokenSource;
use gix_diff::tree::Visit;
use gix_hash::ObjectId;
use gix_object::{
bstr::{BStr, BString},
bstr::{BStr, BString, ByteSlice},
FindExt,
};
use gix_traverse::commit::find as find_commit;
Expand Down Expand Up @@ -66,26 +67,23 @@ pub fn file(
suspect: ObjectId,
cache: Option<gix_commitgraph::Graph>,
resource_cache: &mut gix_diff::blob::Platform,
blame_cache: Option<BlameCacheObject>,
file_path: &BStr,
range: Option<Range<u32>>,
) -> Result<Outcome, Error> {
let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);

let mut stats = Statistics::default();
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
let blamed_file_entry_id = find_path_entry_in_commit(
&odb,
&suspect,
file_path,
cache.as_ref(),
&mut buf,
&mut buf2,
&mut stats,
)?
.ok_or_else(|| Error::FileMissing {
file_path: file_path.to_owned(),
commit_id: suspect,
})?;
let mut file_id = |commit, buf: &mut Vec<u8>, buf2: &mut Vec<u8>| {
find_path_entry_in_commit(&odb, commit, file_path, cache.as_ref(), buf, buf2, &mut stats)?.ok_or_else(|| {
Error::FileMissing {
file_path: file_path.to_owned(),
commit_id: suspect,
}
})
};
let blamed_file_entry_id = file_id(&suspect, &mut buf, &mut buf2)?;
let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
let num_lines_in_blamed = tokens_for_diffing(&blamed_file_blob).tokenize().count() as u32;

Expand All @@ -95,17 +93,56 @@ pub fn file(
}

let range_in_blamed_file = one_based_inclusive_to_zero_based_exclusive_range(range, num_lines_in_blamed)?;
let mut hunks_to_blame = vec![UnblamedHunk {
range_in_blamed_file: range_in_blamed_file.clone(),
suspects: [(suspect, range_in_blamed_file)].into(),
}];

let (blame_entries, mut hunks_to_blame) = match blame_cache {
Some(blame_cache) => {
// If there is a cache, we first get the diff between the current commit and the commit
// we passed as the cache.
let old_file_id = file_id(&blame_cache.cache_id, &mut buf, &mut buf2)?;
let changes = blob_changes(
&odb,
resource_cache,
blamed_file_entry_id,
old_file_id,
file_path.as_bstr(),
&mut stats,
)?;

// If there are no changes, we can return the cache as is immediately.
if changes.iter().all(|change| matches!(change, Change::Unchanged(_))) {
return Ok(Outcome {
entries: blame_cache.entries.clone(),
blob: blamed_file_blob,
statistics: stats,
});
}
// Otherwise, we update the cache with the new changes.
let (blame_entries, hunks_to_blame) = update_blame_with_changes(blame_cache.entries, changes, suspect);
// If there are no more hunks to blame, we can return the result immediately.
if hunks_to_blame.is_empty() {
return Ok(Outcome {
entries: blame_entries,
blob: blamed_file_blob,
statistics: stats,
});
}
(blame_entries, hunks_to_blame)
}
None => {
let hunks_to_blame = vec![UnblamedHunk {
range_in_blamed_file: range_in_blamed_file.clone(),
suspects: [(suspect, range_in_blamed_file)].into(),
}];
(Vec::new(), hunks_to_blame)
}
};

let (mut buf, mut buf2) = (Vec::new(), Vec::new());
let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
let mut queue: gix_revwalk::PriorityQueue<CommitTime, ObjectId> = gix_revwalk::PriorityQueue::new();
queue.insert(commit_time(commit)?, suspect);

let mut out = Vec::new();
let mut out = blame_entries;
let mut diff_state = gix_diff::tree::State::default();
let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
'outer: while let Some(suspect) = queue.pop_value() {
Expand Down
143 changes: 142 additions & 1 deletion gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::ops::Range;

use gix_hash::ObjectId;

use crate::types::{BlameEntry, Either, LineRange};
use crate::types::{BlameEntry, BlameLines, ChangeLines, Either, LineRange};
use crate::types::{Change, Offset, UnblamedHunk};

pub(super) mod function;
Expand Down Expand Up @@ -357,6 +357,147 @@ fn process_changes(
new_hunks_to_blame
}

/// Consume `cached_blames` and `changes`. With the changes we update the cached blames.
/// This function returns the updated blames and the new hunks to blame.
fn update_blame_with_changes(
cached_blames: Vec<BlameEntry>,
changes: Vec<Change>,
head_id: ObjectId,
) -> (Vec<BlameEntry>, Vec<UnblamedHunk>) {
fn blame_fully_contained_by_change(
blame_lines: &BlameLines,
blame: &BlameEntry,
change_lines: &ChangeLines,
change: &Change,
) -> bool {
blame_lines.get_remaining(blame) < change_lines.get_remaining(change)
}

let mut updated_blames = Vec::new();
let mut new_hunks_to_blame = Vec::new();

let mut blame_iter = cached_blames.into_iter().peekable();

// This is a nested loop where we iterate over the changes and the blames.
// We keep track of the assigned lines in the change and the blame.
// For each of the three possible cases (Unchanged, Deleted, AddedOrReplaced) we have different
// rules for how to update the blame.
'change: for change in changes {
let mut change_assigned = ChangeLines::default();
while let Some(blame) = blame_iter.peek_mut() {
let mut blame_assigned = BlameLines::default();

// For each of the three cases we have to check if the blame is fully contained by the change.
// If so we can update the blame with the remaining length of the blame.
// If not we have to update the blame with the remaining length of the change.
match change {
Change::Unchanged(ref range) => {
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
true => {
updated_blames.push(BlameEntry {
start_in_blamed_file: range.start + change_assigned.assigned.get_assigned(),
start_in_source_file: blame.start_in_source_file,
len: blame.len,
commit_id: blame.commit_id,
});

change_assigned.assigned.add_assigned(blame.len.get());
blame_assigned.assigned.add_assigned(blame.len.get());
}
false => {
updated_blames.push(BlameEntry {
start_in_blamed_file: range.start + change_assigned.assigned.get_assigned(),
start_in_source_file: blame.start_in_source_file,
len: NonZeroU32::new(change_assigned.get_remaining(&change)).unwrap(),
commit_id: blame.commit_id,
});

blame_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
change_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
}
}
}
Change::Deleted(_start_deletion, _lines_deleted) => {
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
true => {
blame_assigned.assigned.add_assigned(blame.len.get());
change_assigned.assigned.add_assigned(blame.len.get());
}
false => {
blame_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
change_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
}
}
}
Change::AddedOrReplaced(ref range, lines_deleted) => {
let new_unblamed_hunk = |range: &Range<u32>, head_id: ObjectId| UnblamedHunk {
range_in_blamed_file: range.clone(),
suspects: [(head_id, range.clone())].into(),
};
match blame_fully_contained_by_change(&blame_assigned, blame, &change_assigned, &change) {
true => {
if lines_deleted == 0 {
new_hunks_to_blame.push(new_unblamed_hunk(range, head_id));
}

change_assigned.assigned.add_assigned(blame.len.get());
blame_assigned.assigned.add_assigned(blame.len.get());
}
false => {
new_hunks_to_blame.push(new_unblamed_hunk(range, head_id));

blame_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
change_assigned
.assigned
.add_assigned(change_assigned.get_remaining(&change));
}
}
}
}

// Check if the blame or the change is fully assigned.
// If the blame is fully assigned we can continue with the next blame.
// If the change is fully assigned we can continue with the next change.
// Since we have a mutable reference to the blame we can update it and reset the assigned blame lines.
// If both are fully assigned we can continue with the next blame and change.
match (
blame_assigned.has_remaining(blame),
change_assigned.has_remaining(&change),
) {
(true, true) => {
// Both have remaining
blame.update_blame(&blame_assigned.assigned);
}
(true, false) => {
// Change is fully assigned
blame.update_blame(&blame_assigned.assigned);
continue 'change;
}
(false, true) => {
// Blame is fully assigned
blame_iter.next();
}
(false, false) => {
// Both are fully assigned
blame_iter.next();
continue 'change;
}
};
}
}
(updated_blames, new_hunks_to_blame)
}

impl UnblamedHunk {
fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self {
self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset));
Expand Down
Loading
Loading