Skip to content

Commit

Permalink
feat!: generalize rename-tracking engine for later use with status.
Browse files Browse the repository at this point in the history
Previously the rename tracking engine was integrated with tree-diffs,
but already operates in a stand-alone fashion.
Now it's officially generalized which allows it to be tested separately
and used when tracking renames for diffs between index and tree, index
and index, and index and worktree.
  • Loading branch information
Byron committed Nov 11, 2023
1 parent e2745fd commit 089c4dc
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 734 deletions.
8 changes: 2 additions & 6 deletions gix/src/config/cache/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,9 @@ impl Cache {
}

#[cfg(feature = "blob-diff")]
pub(crate) fn diff_renames(
&self,
) -> Result<Option<crate::object::tree::diff::Rewrites>, crate::object::tree::diff::rewrites::Error> {
pub(crate) fn diff_renames(&self) -> Result<Option<crate::diff::Rewrites>, crate::diff::new_rewrites::Error> {
self.diff_renames
.get_or_try_init(|| {
crate::object::tree::diff::Rewrites::try_from_config(&self.resolved, self.lenient_config)
})
.get_or_try_init(|| crate::diff::new_rewrites(&self.resolved, self.lenient_config))
.copied()
}

Expand Down
2 changes: 1 addition & 1 deletion gix/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ pub(crate) struct Cache {
pub(crate) url_rewrite: OnceCell<crate::remote::url::Rewrite>,
/// The lazy-loaded rename information for diffs.
#[cfg(feature = "blob-diff")]
pub(crate) diff_renames: OnceCell<Option<crate::object::tree::diff::Rewrites>>,
pub(crate) diff_renames: OnceCell<Option<crate::diff::Rewrites>>,
/// A lazily loaded mapping to know which url schemes to allow
#[cfg(any(feature = "blocking-network-client", feature = "async-network-client"))]
pub(crate) url_scheme: OnceCell<crate::remote::url::SchemePermission>,
Expand Down
63 changes: 63 additions & 0 deletions gix/src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,66 @@ pub mod rename {
RenamesAndCopies,
}
}

///
#[cfg(feature = "blob-diff")]
mod utils {
use crate::config::cache::util::ApplyLeniency;
use crate::config::tree::Diff;
use crate::diff::rename::Tracking;
use gix_diff::rewrites::Copies;
use gix_diff::Rewrites;

///
pub mod new_rewrites {
/// The error returned by [`new_rewrites()`](super::new_rewrites()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
ConfigDiffRenames(#[from] crate::config::key::GenericError),
#[error(transparent)]
ConfigDiffRenameLimit(#[from] crate::config::unsigned_integer::Error),
}
}

/// Create an instance by reading all relevant information from the `config`uration, while being `lenient` or not.
/// Returns `Ok(None)` if nothing is configured.
///
/// Note that missing values will be defaulted similar to what git does.
#[allow(clippy::result_large_err)]
pub fn new_rewrites(
config: &gix_config::File<'static>,
lenient: bool,
) -> Result<Option<Rewrites>, new_rewrites::Error> {
let key = "diff.renames";
let copies = match config
.boolean_by_key(key)
.map(|value| Diff::RENAMES.try_into_renames(value))
.transpose()
.with_leniency(lenient)?
{
Some(renames) => match renames {
Tracking::Disabled => return Ok(None),
Tracking::Renames => None,
Tracking::RenamesAndCopies => Some(Copies::default()),
},
None => return Ok(None),
};

let default = Rewrites::default();
Ok(Rewrites {
copies,
limit: config
.integer_by_key("diff.renameLimit")
.map(|value| Diff::RENAME_LIMIT.try_into_usize(value))
.transpose()
.with_leniency(lenient)?
.unwrap_or(default.limit),
..default
}
.into())
}
}
#[cfg(feature = "blob-diff")]
pub use utils::new_rewrites;
14 changes: 1 addition & 13 deletions gix/src/object/tree/diff/change.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
use crate::diff::blob::DiffLineStats;
use crate::{bstr::BStr, Id};

/// Information about the diff performed to detect similarity of a [Rewrite][Event::Rewrite].
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct DiffLineStats {
/// The amount of lines to remove from the source to get to the destination.
pub removals: u32,
/// The amount of lines to add to the source to get to the destination.
pub insertions: u32,
/// The amount of lines of the previous state, in the source.
pub before: u32,
/// The amount of lines of the new state, in the destination.
pub after: u32,
}

/// An event emitted when finding differences between two trees.
#[derive(Debug, Clone, Copy)]
pub enum Event<'a, 'old, 'new> {
Expand Down
102 changes: 83 additions & 19 deletions gix/src/object/tree/diff/for_each.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
use gix_object::TreeRefIter;

use super::{change, Action, Change, Platform};
use crate::{
bstr::BStr,
ext::ObjectIdExt,
object::tree::{
diff,
diff::{rewrites, tracked},
},
Repository, Tree,
};
use crate::diff::rewrites::tracker;
use crate::{bstr::BStr, diff::rewrites, ext::ObjectIdExt, object::tree::diff, Repository, Tree};

/// The error return by methods on the [diff platform][Platform].
#[derive(Debug, thiserror::Error)]
Expand All @@ -19,12 +12,10 @@ pub enum Error {
Diff(#[from] gix_diff::tree::changes::Error),
#[error("The user-provided callback failed")]
ForEach(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("Could not find blob for similarity checking")]
FindExistingBlob(#[from] crate::object::find::existing::Error),
#[error("Could not configure diff algorithm prior to checking similarity")]
ConfigureDiffAlgorithm(#[from] crate::config::diff::algorithm::Error),
#[error("Could not traverse tree to obtain possible sources for copies")]
TraverseTreeForExhaustiveCopyDetection(#[from] gix_traverse::tree::breadthfirst::Error),
#[error("Failure during rename tracking")]
RenameTracking(#[from] tracker::emit::Error),
}

///
Expand All @@ -49,12 +40,14 @@ impl<'a, 'old> Platform<'a, 'old> {
E: std::error::Error + Sync + Send + 'static,
{
let repo = self.lhs.repo;
let diff_algo = repo.config.diff_algorithm()?;
let mut delegate = Delegate {
src_tree: self.lhs,
other_repo: other.repo,
recorder: gix_diff::tree::Recorder::default().track_location(self.tracking),
visit: for_each,
tracked: self.rewrites.map(|r| tracked::State::new(r, self.tracking)),
location: self.tracking,
tracked: self.rewrites.map(|r| rewrites::Tracker::new(r, diff_algo)),
err: None,
};
match gix_diff::tree::Changes::from(TreeRefIter::from_bytes(&self.lhs.data)).needed_to_obtain(
Expand Down Expand Up @@ -87,7 +80,8 @@ struct Delegate<'a, 'old, 'new, VisitFn, E> {
other_repo: &'new Repository,
recorder: gix_diff::tree::Recorder,
visit: VisitFn,
tracked: Option<tracked::State>,
tracked: Option<rewrites::Tracker<gix_diff::tree::visit::Change>>,
location: Option<gix_diff::tree::recorder::Location>,
err: Option<E>,
}

Expand Down Expand Up @@ -151,14 +145,14 @@ where
location: dest.location,
event: diff::change::Event::Rewrite {
source_location: source.location,
source_entry_mode: source.mode,
source_entry_mode: source.entry_mode,
source_id: source.id.attach(self.src_tree.repo),
entry_mode: mode,
id: oid.to_owned().attach(self.other_repo),
diff: source.diff,
copy: match source.kind {
tracked::visit::Kind::RenameTarget => false,
tracked::visit::Kind::CopyDestination => true,
tracker::visit::SourceKind::Rename => false,
tracker::visit::SourceKind::Copy => true,
},
},
};
Expand All @@ -180,7 +174,12 @@ where
&mut self.err,
),
},
self.src_tree,
&self.src_tree.repo.objects,
|push| {
self.src_tree
.traverse()
.breadthfirst(&mut tree_to_changes::Delegate::new(push, self.location))
},
)?;
Ok(Some(outcome))
}
Expand Down Expand Up @@ -233,3 +232,68 @@ where
}
}
}

mod tree_to_changes {
use gix_diff::tree::visit::Change;
use gix_object::tree::EntryRef;

use crate::bstr::BStr;

pub struct Delegate<'a> {
push: &'a mut dyn FnMut(Change, &BStr),
recorder: gix_traverse::tree::Recorder,
}

impl<'a> Delegate<'a> {
pub fn new(
push: &'a mut dyn FnMut(Change, &BStr),
location: Option<gix_diff::tree::recorder::Location>,
) -> Self {
let location = location.map(|t| match t {
gix_diff::tree::recorder::Location::FileName => gix_traverse::tree::recorder::Location::FileName,
gix_diff::tree::recorder::Location::Path => gix_traverse::tree::recorder::Location::Path,
});
Self {
push,
recorder: gix_traverse::tree::Recorder::default().track_location(location),
}
}
}

impl gix_traverse::tree::Visit for Delegate<'_> {
fn pop_front_tracked_path_and_set_current(&mut self) {
self.recorder.pop_front_tracked_path_and_set_current()
}

fn push_back_tracked_path_component(&mut self, component: &BStr) {
self.recorder.push_back_tracked_path_component(component)
}

fn push_path_component(&mut self, component: &BStr) {
self.recorder.push_path_component(component)
}

fn pop_path_component(&mut self) {
self.recorder.pop_path_component();
}

fn visit_tree(&mut self, _entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
gix_traverse::tree::visit::Action::Continue
}

fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> gix_traverse::tree::visit::Action {
if entry.mode.is_blob() {
(self.push)(
Change::Modification {
previous_entry_mode: entry.mode,
previous_oid: gix_hash::ObjectId::null(entry.oid.kind()),
entry_mode: entry.mode,
oid: entry.oid.to_owned(),
},
self.recorder.path(),
);
}
gix_traverse::tree::visit::Action::Continue
}
}
}
31 changes: 2 additions & 29 deletions gix/src/object/tree/diff/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use gix_diff::tree::recorder::Location;

use crate::diff::Rewrites;
use crate::{bstr::BStr, Tree};

/// Returned by the `for_each` function to control flow.
Expand Down Expand Up @@ -39,7 +40,7 @@ impl<'repo> Tree<'repo> {
/// try to access blobs to compute a similarity metric. Thus, it's more compatible to turn rewrite tracking off
/// using [`Platform::track_rewrites()`].
#[allow(clippy::result_large_err)]
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, rewrites::Error> {
pub fn changes<'a>(&'a self) -> Result<Platform<'a, 'repo>, crate::diff::new_rewrites::Error> {
Ok(Platform {
state: Default::default(),
lhs: self,
Expand All @@ -58,34 +59,6 @@ pub struct Platform<'a, 'repo> {
rewrites: Option<Rewrites>,
}

/// A structure to capture how to perform rename and copy tracking
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Rewrites {
/// If `Some(…)`, do also find copies. `None` is the default which does not try to detect copies at all.
///
/// Note that this is an even more expensive operation than detecting renames as files.
pub copies: Option<rewrites::Copies>,
/// The percentage of similarity needed for files to be considered renamed, defaulting to `Some(0.5)`.
/// This field is similar to `git diff -M50%`.
///
/// If `None`, files are only considered equal if their content matches 100%.
/// Note that values greater than 1.0 have no different effect than 1.0.
pub percentage: Option<f32>,
/// The amount of files to consider for fuzzy rename or copy tracking. Defaults to 1000, meaning that only 1000*1000
/// combinations can be tested for fuzzy matches, i.e. the ones that try to find matches by comparing similarity.
/// If 0, there is no limit.
///
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
/// run the fuzzy version of identity tests at all. That way results are never partial.
pub limit: usize,
}

///
pub mod rewrites;

/// types to actually perform rename tracking.
pub(crate) mod tracked;

/// Configuration
impl<'a, 'repo> Platform<'a, 'repo> {
/// Keep track of file-names, which makes the [`location`][Change::location] field usable with the filename of the changed item.
Expand Down
Loading

0 comments on commit 089c4dc

Please sign in to comment.