From a8673224e8eca01b2d73088775efa68cdc5fdb1b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 29 Dec 2024 08:36:39 +0100 Subject: [PATCH] feat: add `index()` to diff two indices It comes with pathspec support to allow for easier integration into the `status()` machinery. --- Cargo.lock | 5 + gix-diff/Cargo.toml | 11 +- gix-diff/src/index/change.rs | 197 +++ gix-diff/src/index/function.rs | 324 ++++ gix-diff/src/index/mod.rs | 141 ++ gix-diff/src/lib.rs | 6 + gix-diff/tests/Cargo.toml | 5 +- gix-diff/tests/diff/index.rs | 1367 +++++++++++++++++ gix-diff/tests/diff/main.rs | 1 + .../make_diff_for_rewrites_repo.tar | Bin 354816 -> 368128 bytes .../fixtures/make_diff_for_rewrites_repo.sh | 14 + 11 files changed, 2067 insertions(+), 4 deletions(-) create mode 100644 gix-diff/src/index/change.rs create mode 100644 gix-diff/src/index/function.rs create mode 100644 gix-diff/src/index/mod.rs create mode 100644 gix-diff/tests/diff/index.rs diff --git a/Cargo.lock b/Cargo.lock index fe6eaeae126..fbf3e71b887 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1715,12 +1715,15 @@ dependencies = [ "bstr", "document-features", "getrandom", + "gix-attributes 0.23.1", "gix-command", "gix-filter", "gix-fs 0.12.1", "gix-hash 0.15.1", + "gix-index 0.37.0", "gix-object 0.46.1", "gix-path 0.10.13", + "gix-pathspec", "gix-tempfile 15.0.0", "gix-trace 0.1.11", "gix-traverse 0.43.1", @@ -1738,8 +1741,10 @@ dependencies = [ "gix-filter", "gix-fs 0.12.1", "gix-hash 0.15.1", + "gix-index 0.37.0", "gix-object 0.46.1", "gix-odb", + "gix-pathspec", "gix-testtools", "gix-traverse 0.43.1", "gix-worktree 0.38.0", diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 70a48195093..e3de39938c3 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -13,11 +13,13 @@ rust-version = "1.65" autotests = false [features] -default = ["blob"] -## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. +default = ["blob", "index"] +## Enable diffing of blobs using imara-diff. blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-traverse"] +## Enable diffing of two indices, which also allows for a generic rewrite tracking implementation. +index = ["dep:gix-index", "dep:gix-pathspec", "dep:gix-attributes"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-index?/serde"] ## Make it possible to compile to the `wasm32-unknown-unknown` target. wasm = ["dep:getrandom"] @@ -25,6 +27,9 @@ wasm = ["dep:getrandom"] doctest = false [dependencies] +gix-index = { version = "^0.37.0", path = "../gix-index", optional = true } +gix-pathspec = { version = "^0.8.1", path = "../gix-pathspec", optional = true } +gix-attributes = { version = "^0.23.1", path = "../gix-attributes", optional = true } gix-hash = { version = "^0.15.1", path = "../gix-hash" } gix-object = { version = "^0.46.1", path = "../gix-object" } gix-filter = { version = "^0.16.0", path = "../gix-filter", optional = true } diff --git a/gix-diff/src/index/change.rs b/gix-diff/src/index/change.rs new file mode 100644 index 00000000000..da5d98d1f4e --- /dev/null +++ b/gix-diff/src/index/change.rs @@ -0,0 +1,197 @@ +use crate::index::{Change, ChangeRef}; +use crate::rewrites; +use crate::rewrites::tracker::ChangeKind; +use crate::tree::visit::Relation; +use bstr::BStr; +use gix_object::tree; +use std::borrow::Cow; + +impl ChangeRef<'_, '_> { + /// Copy everything into an owned version of this instance. + pub fn into_owned(self) -> Change { + match self { + ChangeRef::Addition { + location, + index, + entry_mode, + id, + } => ChangeRef::Addition { + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Deletion { + location, + index, + entry_mode, + id, + } => ChangeRef::Deletion { + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Modification { + location, + previous_index, + previous_entry_mode, + previous_id, + index, + entry_mode, + id, + } => ChangeRef::Modification { + location: Cow::Owned(location.into_owned()), + previous_index, + previous_entry_mode, + previous_id: Cow::Owned(previous_id.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Rewrite { + source_location, + source_index, + source_entry_mode, + source_id, + location, + index, + entry_mode, + id, + copy, + } => ChangeRef::Rewrite { + source_location: Cow::Owned(source_location.into_owned()), + source_index, + source_entry_mode, + source_id: Cow::Owned(source_id.into_owned()), + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + copy, + }, + ChangeRef::Unmerged { + location, + stage, + index, + entry_mode, + id, + } => ChangeRef::Unmerged { + location: Cow::Owned(location.into_owned()), + stage, + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + } + } +} + +impl ChangeRef<'_, '_> { + /// Return all shared fields among all variants: `(location, index, entry_mode, id)` + /// + /// In case of rewrites, the fields return to the current change. + pub fn fields(&self) -> (&BStr, usize, gix_index::entry::Mode, &gix_hash::oid) { + match self { + ChangeRef::Addition { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Deletion { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Modification { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Rewrite { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Unmerged { + location, + index, + entry_mode, + id, + .. + } => (location.as_ref(), *index, *entry_mode, id), + } + } +} + +impl rewrites::tracker::Change for ChangeRef<'_, '_> { + fn id(&self) -> &gix_hash::oid { + match self { + ChangeRef::Addition { id, .. } | ChangeRef::Deletion { id, .. } | ChangeRef::Modification { id, .. } => { + id.as_ref() + } + ChangeRef::Rewrite { .. } | ChangeRef::Unmerged { .. } => { + unreachable!("BUG") + } + } + } + + fn relation(&self) -> Option { + None + } + + fn kind(&self) -> ChangeKind { + match self { + ChangeRef::Addition { .. } => ChangeKind::Addition, + ChangeRef::Deletion { .. } => ChangeKind::Deletion, + ChangeRef::Modification { .. } => ChangeKind::Modification, + ChangeRef::Rewrite { .. } => { + unreachable!("BUG: rewrites can't be determined ahead of time") + } + ChangeRef::Unmerged { .. } => { + unreachable!("BUG: unmerged don't participate in rename tracking") + } + } + } + + fn entry_mode(&self) -> tree::EntryMode { + match self { + ChangeRef::Addition { entry_mode, .. } + | ChangeRef::Deletion { entry_mode, .. } + | ChangeRef::Modification { entry_mode, .. } + | ChangeRef::Rewrite { entry_mode, .. } + | ChangeRef::Unmerged { entry_mode, .. } => { + entry_mode + .to_tree_entry_mode() + // Default is for the impossible case - just don't let it participate in rename tracking. + .unwrap_or(tree::EntryKind::Tree.into()) + } + } + } + + fn id_and_entry_mode(&self) -> (&gix_hash::oid, tree::EntryMode) { + match self { + ChangeRef::Addition { id, entry_mode, .. } + | ChangeRef::Deletion { id, entry_mode, .. } + | ChangeRef::Modification { id, entry_mode, .. } + | ChangeRef::Rewrite { id, entry_mode, .. } + | ChangeRef::Unmerged { id, entry_mode, .. } => { + ( + id, + entry_mode + .to_tree_entry_mode() + // Default is for the impossible case - just don't let it participate in rename tracking. + .unwrap_or(tree::EntryKind::Tree.into()), + ) + } + } + } +} diff --git a/gix-diff/src/index/function.rs b/gix-diff/src/index/function.rs new file mode 100644 index 00000000000..bbf2a9a3140 --- /dev/null +++ b/gix-diff/src/index/function.rs @@ -0,0 +1,324 @@ +use super::{Action, ChangeRef, Error, RewriteOptions}; +use crate::rewrites; +use bstr::{BStr, BString, ByteSlice}; +use gix_filter::attributes::glob::pattern::Case; +use std::borrow::Cow; +use std::cell::RefCell; +use std::cmp::Ordering; + +/// Produce an entry-by-entry diff between `lhs` and `rhs`, sending changes to `cb(change) -> Action` for consumption, +/// which would turn `lhs` into `rhs` if applied. +/// Use `pathspec` to reduce the set of entries to look at, and `pathspec_attributes` may be used by pathspecs that perform +/// attribute lookups. +/// +/// If `cb` indicated that the operation should be cancelled, no error is triggered as this isn't supposed to +/// occur through user-interaction - this diff is typically too fast. +/// +/// Note that rewrites will be emitted at the end, so no ordering can be assumed. They will only be tracked if +/// `rewrite_options` is `Some`. Note that the set of entries participating in rename tracking is affected by `pathspec`. +/// +/// Return the outcome of the rewrite tracker if it was enabled. +/// +/// Note that only `rhs` may contain unmerged entries, as `rhs` is expected to be the index read from `.git/index`. +/// Unmerged entries are always provided as changes, one stage at a time, up to three stages for *base*, *ours* and *theirs*. +/// Conceptually, `rhs` is *ours*, and `lhs` is *theirs*. +/// The entries in `lhs` and `rhs` are both expected to be sorted like index entries are typically sorted. +/// +/// Note that sparse indices aren't supported, they must be "unsparsed" before. +pub fn diff<'rhs, 'lhs: 'rhs, E, Find>( + lhs: &'lhs gix_index::State, + rhs: &'rhs gix_index::State, + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + rewrite_options: Option>, + pathspec: &mut gix_pathspec::Search, + pathspec_attributes: &mut dyn FnMut(&BStr, Case, bool, &mut gix_attributes::search::Outcome) -> bool, +) -> Result, Error> +where + E: Into>, + Find: gix_object::FindObjectOrHeader, +{ + if lhs.is_sparse() || rhs.is_sparse() { + return Err(Error::IsSparse); + } + if lhs + .entries() + .iter() + .any(|e| e.stage() != gix_index::entry::Stage::Unconflicted) + { + return Err(Error::LhsHasUnmerged); + } + + let lhs_range = lhs + .prefixed_entries_range(pathspec.common_prefix()) + .unwrap_or_else(|| 0..lhs.entries().len()); + let rhs_range = rhs + .prefixed_entries_range(pathspec.common_prefix()) + .unwrap_or_else(|| 0..rhs.entries().len()); + + let pattern_matches = RefCell::new(|relative_path, entry: &gix_index::Entry| { + pathspec + .pattern_matching_relative_path(relative_path, Some(entry.mode.is_submodule()), pathspec_attributes) + .map_or(false, |m| !m.is_excluded()) + }); + + let (mut lhs_iter, mut rhs_iter) = ( + lhs.entries()[lhs_range.clone()] + .iter() + .enumerate() + .map(|(idx, e)| (idx + lhs_range.start, e.path(lhs), e)) + .filter(|(_, path, e)| pattern_matches.borrow_mut()(path, e)), + rhs.entries()[rhs_range.clone()] + .iter() + .enumerate() + .map(|(idx, e)| (idx + rhs_range.start, e.path(rhs), e)) + .filter(|(_, path, e)| pattern_matches.borrow_mut()(path, e)), + ); + + let mut conflicting_paths = Vec::::new(); + let mut cb = move |change: ChangeRef<'lhs, 'rhs>| { + let (location, ..) = change.fields(); + if let ChangeRef::Unmerged { .. } = &change { + if let Err(insert_idx) = conflicting_paths.binary_search_by(|p| p.as_bstr().cmp(location)) { + conflicting_paths.insert(insert_idx, location.to_owned()); + } + cb(change) + } else if conflicting_paths + .binary_search_by(|p| p.as_bstr().cmp(location)) + .is_err() + { + cb(change) + } else { + Ok(Action::Continue) + } + }; + let mut resource_cache_storage = None; + let mut tracker = rewrite_options.map( + |RewriteOptions { + resource_cache, + rewrites, + find, + }| { + resource_cache_storage = Some((resource_cache, find)); + rewrites::Tracker::>::new(rewrites) + }, + ); + + let (mut lhs_storage, mut rhs_storage) = (lhs_iter.next(), rhs_iter.next()); + loop { + match (lhs_storage, rhs_storage) { + (Some(lhs), Some(rhs)) => { + match emit_unmerged_ignore_intent_to_add(rhs, &mut cb)? { + None => {} + Some(Action::Cancel) => return Ok(None), + Some(Action::Continue) => { + rhs_storage = rhs_iter.next(); + continue; + } + }; + + let (lhs_idx, lhs_path, lhs_entry) = lhs; + let (rhs_idx, rhs_path, rhs_entry) = rhs; + match lhs_path.cmp(rhs_path) { + Ordering::Less => match emit_deletion(lhs, &mut cb, tracker.as_mut())? { + Action::Continue => { + lhs_storage = lhs_iter.next(); + } + Action::Cancel => return Ok(None), + }, + Ordering::Equal => { + if lhs_entry.id != rhs_entry.id || lhs_entry.mode != rhs_entry.mode { + let change = ChangeRef::Modification { + location: Cow::Borrowed(rhs_path), + previous_index: lhs_idx, + previous_entry_mode: lhs_entry.mode, + previous_id: Cow::Borrowed(lhs_entry.id.as_ref()), + index: rhs_idx, + entry_mode: rhs_entry.mode, + id: Cow::Borrowed(rhs_entry.id.as_ref()), + }; + + let change = match tracker.as_mut() { + None => Some(change), + Some(tracker) => tracker.try_push_change(change, rhs_path), + }; + if let Some(change) = change { + match cb(change).map_err(|err| Error::Callback(err.into()))? { + Action::Continue => {} + Action::Cancel => return Ok(None), + } + } + } + lhs_storage = lhs_iter.next(); + rhs_storage = rhs_iter.next(); + } + Ordering::Greater => match emit_addition(rhs, &mut cb, tracker.as_mut())? { + Action::Continue => { + rhs_storage = rhs_iter.next(); + } + Action::Cancel => return Ok(None), + }, + } + } + (Some(lhs), None) => match emit_deletion(lhs, &mut cb, tracker.as_mut())? { + Action::Cancel => return Ok(None), + Action::Continue => { + lhs_storage = lhs_iter.next(); + } + }, + (None, Some(rhs)) => match emit_addition(rhs, &mut cb, tracker.as_mut())? { + Action::Cancel => return Ok(None), + Action::Continue => { + rhs_storage = rhs_iter.next(); + } + }, + (None, None) => break, + } + } + + if let Some((mut tracker, (resource_cache, find))) = tracker.zip(resource_cache_storage) { + let mut cb_err = None; + let out = tracker.emit( + |dst, src| { + let change = if let Some(src) = src { + let (lhs_path, lhs_index, lhs_mode, lhs_id) = src.change.fields(); + let (rhs_path, rhs_index, rhs_mode, rhs_id) = dst.change.fields(); + ChangeRef::Rewrite { + source_location: Cow::Owned(lhs_path.into()), + source_index: lhs_index, + source_entry_mode: lhs_mode, + source_id: Cow::Owned(lhs_id.into()), + location: Cow::Owned(rhs_path.into()), + index: rhs_index, + entry_mode: rhs_mode, + id: Cow::Owned(rhs_id.into()), + copy: match src.kind { + rewrites::tracker::visit::SourceKind::Rename => false, + rewrites::tracker::visit::SourceKind::Copy => true, + }, + } + } else { + dst.change + }; + match cb(change) { + Ok(Action::Continue) => crate::tree::visit::Action::Continue, + Ok(Action::Cancel) => crate::tree::visit::Action::Cancel, + Err(err) => { + cb_err = Some(Error::Callback(err.into())); + crate::tree::visit::Action::Cancel + } + } + }, + resource_cache, + find, + |push| { + for (index, entry) in lhs.entries().iter().enumerate() { + let path = entry.path(rhs); + push( + ChangeRef::Modification { + location: Cow::Borrowed(path), + previous_index: 0, /* does not matter */ + previous_entry_mode: entry.mode, + previous_id: Cow::Owned(entry.id.kind().null()), + index, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }, + path, + ); + } + Ok::<_, std::convert::Infallible>(()) + }, + )?; + + if let Some(err) = cb_err { + Err(err) + } else { + Ok(Some(out)) + } + } else { + Ok(None) + } +} + +fn emit_deletion<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'lhs BStr, &'lhs gix_index::Entry), + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + tracker: Option<&mut rewrites::Tracker>>, +) -> Result +where + E: Into>, +{ + let change = ChangeRef::Deletion { + location: Cow::Borrowed(path), + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }; + + let change = match tracker { + None => change, + Some(tracker) => match tracker.try_push_change(change, path) { + Some(change) => change, + None => return Ok(Action::Continue), + }, + }; + + cb(change).map_err(|err| Error::Callback(err.into())) +} + +fn emit_addition<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'rhs BStr, &'rhs gix_index::Entry), + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + tracker: Option<&mut rewrites::Tracker>>, +) -> Result +where + E: Into>, +{ + if let Some(action) = emit_unmerged_ignore_intent_to_add((idx, path, entry), &mut cb)? { + return Ok(action); + } + + let change = ChangeRef::Addition { + location: Cow::Borrowed(path), + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }; + + let change = match tracker { + None => change, + Some(tracker) => match tracker.try_push_change(change, path) { + Some(change) => change, + None => return Ok(Action::Continue), + }, + }; + + cb(change).map_err(|err| Error::Callback(err.into())) +} + +fn emit_unmerged_ignore_intent_to_add<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'rhs BStr, &'rhs gix_index::Entry), + cb: &mut impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, +) -> Result, Error> +where + E: Into>, +{ + if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { + return Ok(Some(Action::Continue)); + } + let stage = entry.stage(); + if stage == gix_index::entry::Stage::Unconflicted { + return Ok(None); + } + + Ok(Some( + cb(ChangeRef::Unmerged { + location: Cow::Borrowed(path), + stage, + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }) + .map_err(|err| Error::Callback(err.into()))?, + )) +} diff --git a/gix-diff/src/index/mod.rs b/gix-diff/src/index/mod.rs new file mode 100644 index 00000000000..0a66b3ca1e7 --- /dev/null +++ b/gix-diff/src/index/mod.rs @@ -0,0 +1,141 @@ +use bstr::BStr; +use std::borrow::Cow; + +/// The error returned by [`index()`](crate::index()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Cannot diff indices that contain sparse entries")] + IsSparse, + #[error("Unmerged entries aren't allowed in the left-hand index, only in the right-hand index")] + LhsHasUnmerged, + #[error("The callback indicated failure")] + Callback(#[source] Box), + #[error("Failure during rename tracking")] + RenameTracking(#[from] crate::rewrites::tracker::emit::Error), +} + +/// What to do after a [ChangeRef] was passed ot the callback of [`index()`](crate::index()). +#[derive(Default, Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)] +pub enum Action { + /// Continue the operation. + #[default] + Continue, + /// Stop the operation immediately. + /// + /// This is useful if one just wants to determine if something changed or not. + Cancel, +} + +/// Options to configure how rewrites are tracked as part of the [`index()`](crate::index()) call. +pub struct RewriteOptions<'a, Find> +where + Find: gix_object::FindObjectOrHeader, +{ + /// The cache to be used when rename-tracking by similarity is enabled, typically the default. + /// Note that it's recommended to call [`clear_resource_cache()`](`crate::blob::Platform::clear_resource_cache()`) + /// between the calls to avoid runaway memory usage, as the cache isn't limited. + pub resource_cache: &'a mut crate::blob::Platform, + /// A way to lookup objects from the object database, for use in similarity checks. + pub find: &'a Find, + /// Configure how rewrites are tracked. + pub rewrites: crate::Rewrites, +} + +/// Identify a change that would have to be applied to `lhs` to obtain `rhs`, as provided in [`index()`](crate::index()). +/// +/// Note that all variants are unconflicted entries, unless it's the [`Self::Unmerged`] one. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ChangeRef<'lhs, 'rhs> { + /// An entry was added to `rhs`. + Addition { + /// The location of the newly added entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was removed from `rhs`. + Deletion { + /// The location the entry that doesn't exist in `rhs`. + location: Cow<'lhs, BStr>, + /// The index into the entries array of `lhs` for full access. + index: usize, + /// The mode of the entry in `lhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `lhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was modified, i.e. has changed its content or its mode. + Modification { + /// The location of the modified entry both in `lhs` and `rhs`. + location: Cow<'rhs, BStr>, + /// The index into the entries array of `lhs` for full access. + previous_index: usize, + /// The previous mode of the entry, in `lhs`. + previous_entry_mode: gix_index::entry::Mode, + /// The previous object id of the entry, in `lhs`. + previous_id: Cow<'lhs, gix_hash::oid>, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was renamed or copied from `lhs` to `rhs`. + /// + /// A rename is effectively fusing together the `Deletion` of the source and the `Addition` of the destination. + Rewrite { + /// The location of the source of the rename or copy operation, in `lhs`. + source_location: Cow<'lhs, BStr>, + /// The index of the entry before the rename, into the entries array of `rhs` for full access. + source_index: usize, + /// The mode of the entry before the rewrite, in `lhs`. + source_entry_mode: gix_index::entry::Mode, + /// The object id of the entry before the rewrite. + /// + /// Note that this is the same as `id` if we require the [similarity to be 100%](super::Rewrites::percentage), but may + /// be different otherwise. + source_id: Cow<'lhs, gix_hash::oid>, + + /// The current location of the entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The index of the entry after the rename, into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry after the rename in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry after the rename in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + + /// If true, this rewrite is created by copy, and `source_id` is pointing to its source. Otherwise, it's a rename, + /// and `source_id` points to a deleted object, as renames are tracked as deletions and additions of the same + /// or similar content. + copy: bool, + }, + /// One of up to three unmerged entries that are provided in order, one for each stage, ordered + /// by `location` and `stage`. + /// + /// Unmerged entries also don't participate in rename tracking, and they are never present in `lhs`. + Unmerged { + /// The current location of the entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The stage of the entry, either *base*, *ours*, or *theirs*. + stage: gix_index::entry::Stage, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, +} + +/// The fully-owned version of [`ChangeRef`]. +pub type Change = ChangeRef<'static, 'static>; + +mod change; +pub(super) mod function; diff --git a/gix-diff/src/lib.rs b/gix-diff/src/lib.rs index ce2451176f5..f438ee4c275 100644 --- a/gix-diff/src/lib.rs +++ b/gix-diff/src/lib.rs @@ -58,6 +58,12 @@ pub mod tree_with_rewrites; #[cfg(feature = "blob")] pub use tree_with_rewrites::function::diff as tree_with_rewrites; +/// +#[cfg(feature = "index")] +pub mod index; +#[cfg(feature = "index")] +pub use index::function::diff as index; + /// #[cfg(feature = "blob")] pub mod blob; diff --git a/gix-diff/tests/Cargo.toml b/gix-diff/tests/Cargo.toml index 9197a86be5b..35645e05d90 100644 --- a/gix-diff/tests/Cargo.toml +++ b/gix-diff/tests/Cargo.toml @@ -17,8 +17,9 @@ name = "diff" path = "diff/main.rs" [dev-dependencies] -insta = "1.40.0" gix-diff = { path = ".." } +gix-index = { version = "^0.37.0", path = "../../gix-index" } +gix-pathspec = { version = "^0.8.1", path = "../../gix-pathspec" } gix-hash = { path = "../../gix-hash" } gix-fs = { path = "../../gix-fs" } gix-worktree = { path = "../../gix-worktree" } @@ -27,5 +28,7 @@ gix-odb = { path = "../../gix-odb" } gix-filter = { path = "../../gix-filter" } gix-traverse = { path = "../../gix-traverse" } gix-testtools = { path = "../../tests/tools" } + +insta = "1.40.0" shell-words = "1" pretty_assertions = "1.4.0" diff --git a/gix-diff/tests/diff/index.rs b/gix-diff/tests/diff/index.rs new file mode 100644 index 00000000000..3aceb637e9d --- /dev/null +++ b/gix-diff/tests/diff/index.rs @@ -0,0 +1,1367 @@ +use gix_diff::index::Change; +use gix_diff::rewrites::{Copies, CopySource}; +use gix_diff::Rewrites; +use gix_object::bstr::BStr; + +#[test] +fn empty_to_new_tree_without_rename_tracking() -> crate::Result { + let changes = collect_changes_no_renames(None, "c1 - initial").expect("really just an addition - nothing to track"); + insta::assert_debug_snapshot!(changes, @r#" + [ + Addition { + location: "a", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "b", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "d", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "dir/c", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + { + let (lhs, rhs, _cache, _odb, mut pathspec) = repo_with_indices(None, "c1 - initial", None)?; + let err = gix_diff::index( + &lhs, + &rhs, + |_change| Err(std::io::Error::new(std::io::ErrorKind::Other, "custom error")), + None::>, + &mut pathspec, + &mut |_, _, _, _| true, + ) + .unwrap_err(); + assert_eq!( + format!("{err:?}"), + r#"Callback(Custom { kind: Other, error: "custom error" })"#, + "custom errors made visible and not squelched" + ); + } + Ok(()) +} + +#[test] +fn changes_against_modified_tree_with_filename_tracking() -> crate::Result { + let changes = collect_changes_no_renames("c2", "c3-modification")?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "a", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(78981922613b2afb6025042ff6bd878ac1994e85), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(b4f17b61de71d9b2e54ac9e62b1629ae2d97a6a7), + }, + Modification { + location: "dir/c", + previous_index: 3, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(6695780ceb14b05e076a99bbd2babf34723b3464), + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + }, + ] + "#); + Ok(()) +} + +#[test] +fn renames_by_identity() -> crate::Result { + for (from, to, expected, assert_msg, track_empty) in [ + ( + "c3-modification", + "r1-identity", + vec![BStr::new("a"), "dir/a-moved".into()], + "one rename and nothing else", + false, + ), + ( + "c4 - add identical files", + "r2-ambiguous", + vec![ + "s1".into(), + "b1".into(), + "s2".into(), + "b2".into(), + "s3".into(), + "z".into(), + ], + "multiple possible sources decide by ordering everything lexicographically", + true, + ), + ( + "c4 - add identical files", + "r2-ambiguous", + vec![], + "nothing is tracked with `track_empty = false`", + false, + ), + ( + "c5 - add links", + "r4-symlinks", + vec!["link-1".into(), "renamed-link-1".into()], + "symlinks are only tracked by identity", + false, + ), + ( + "r1-identity", + "c4 - add identical files", + vec![], + "not having any renames is OK as well", + false, + ), + ( + "tc1-identity", + "tc1-identity", + vec![], + "copy tracking is off by default", + false, + ), + ] { + for percentage in [None, Some(0.5)] { + let (changes, out) = collect_changes_opts( + from, + to, + Some(Rewrites { + percentage, + track_empty, + ..Default::default() + }), + )?; + let actual: Vec<_> = changes + .into_iter() + .flat_map(|c| match c { + Change::Rewrite { + source_location, + location, + copy, + .. + } => { + assert!(!copy); + vec![source_location, location] + } + _ => vec![], + }) + .collect(); + + assert_eq!(actual, expected, "{assert_msg}"); + #[cfg(not(windows))] + assert_eq!( + out.expect("present as rewrites are configured").num_similarity_checks, + 0, + "there are no fuzzy checks in if everything was resolved by identity only" + ); + } + } + Ok(()) +} + +#[test] +fn rename_by_similarity() -> crate::Result { + insta::allow_duplicates! { + for percentage in [ + None, + Some(0.76), /*cutoff point where git stops seeing it as equal */ + ] { + let (changes, out) = collect_changes_opts( + "r2-ambiguous", + "r3-simple", + Some(Rewrites { + percentage, + ..Default::default() + }), + ).expect("errors can only happen with IO or ODB access fails"); + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(61780798228d17af2d34fce4cfbdf35556832472), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + }, + Deletion { + location: "dir/c", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + }, + Addition { + location: "dir/c-moved", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f01e8ddf5adc56985b9a1cda6d7c7ef9e3abe034), + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, if percentage.is_some() { 1 } else { 0 }); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + } + } + + let (changes, out) = collect_changes_opts( + "r2-ambiguous", + "r3-simple", + Some(Rewrites { + percentage: Some(0.6), + limit: 1, // has no effect as it's just one item here. + ..Default::default() + }), + ) + .expect("it found all items at the cut-off point, similar to git"); + + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(61780798228d17af2d34fce4cfbdf35556832472), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + }, + Rewrite { + source_location: "dir/c", + source_index: 5, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + location: "dir/c-moved", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f01e8ddf5adc56985b9a1cda6d7c7ef9e3abe034), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 1); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + Ok(()) +} + +#[test] +fn renames_by_similarity_with_limit() -> crate::Result { + let (changes, out) = collect_changes_opts( + "c6", + "r5", + Some(Rewrites { + limit: 1, // prevent fuzzy tracking from happening + ..Default::default() + }), + )?; + assert_eq!( + changes.iter().filter(|c| matches!(c, Change::Rewrite { .. })).count(), + 0, + "fuzzy tracking is effectively disabled due to limit" + ); + let actual: Vec<_> = changes.iter().map(|c| c.fields().0).collect(); + assert_eq!(actual, ["f1", "f1-renamed", "f2", "f2-renamed"],); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 4); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_by_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "c7", + "tc1-identity", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFiles, + percentage: None, + }), + limit: 1, // the limit isn't actually used for identity based checks + ..Default::default() + }), + )?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "c1", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "c2", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "dir/c3", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_by_similarity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc1-identity", + "tc2-similarity", + Some(Rewrites { + copies: Some(Copies::default()), + ..Default::default() + }), + )?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c4", + index: 6, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c5", + index: 7, + entry_mode: Mode( + FILE, + ), + id: Sha1(08fe19ca4d2f79624f35333157d610811efc1aed), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "dir/c6", + index: 12, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 2, + "two are similar, the other one is identical" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_in_entire_tree_by_similarity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies::default()), + ..Default::default() + }), + )?; + assert_eq!( + changes.iter().filter(|c| matches!(c, Change::Rewrite { .. })).count(), + 0, + "needs --find-copies-harder to detect rewrites here" + ); + let actual: Vec<_> = changes.iter().map(|c| c.fields().0).collect(); + assert_eq!(actual, ["b", "c6", "c7", "newly-added"]); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 3, + "it does have some candidates, probably for rename tracking" + ); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFilesAndAllSources, + ..Default::default() + }), + ..Default::default() + }), + )?; + + // As the full-tree traversal order is different, it sees candidates in different order. + // Let's keep this as expectations, as in future there might be a candidate-based search that considers filenames + // or similarity in names. + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c6", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "r/c3di", + source_index: 12, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + location: "c7", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + Rewrite { + source_location: "c5", + source_index: 7, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(08fe19ca4d2f79624f35333157d610811efc1aed), + location: "newly-added", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(97b3d1a5707f8a11fa5fa8bc6c3bd7b3965601fd), + copy: true, + }, + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(f198d0640214092732566fb00543163845c8252c), + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 4); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_in_entire_tree_by_similarity_with_limit() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFilesAndAllSources, + ..Default::default() + }), + limit: 2, // similarity checks can't be made that way + track_empty: false, + ..Default::default() + }), + )?; + + // Again, it finds a different first match for the rewrite compared to tree-traversal, expected for now. + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c6", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "r/c3di", + source_index: 12, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + location: "c7", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(f198d0640214092732566fb00543163845c8252c), + }, + Addition { + location: "newly-added", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(97b3d1a5707f8a11fa5fa8bc6c3bd7b3965601fd), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0, "similarity checks can't run"); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 21); + + Ok(()) +} + +#[test] +fn realistic_renames_by_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r1-base", + "r1-change", + Some(Rewrites { + copies: Some(Copies::default()), + limit: 1, + track_empty: true, + ..Default::default() + }), + )?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "git-index/src/file.rs", + source_index: 18, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "git-index/src/file/mod.rs", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + Addition { + location: "git-index/tests/index/file/access.rs", + index: 45, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Modification { + location: "git-index/tests/index/file/mod.rs", + previous_index: 45, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + index: 46, + entry_mode: Mode( + FILE, + ), + id: Sha1(8ba3a16384aacc37d01564b28401755ce8053f51), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 1); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_disabled() -> crate::Result { + let changes = collect_changes_no_renames("r1-base", "r1-change")?; + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "git-index/src/file.rs", + index: 18, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "git-index/src/file/mod.rs", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "git-index/tests/index/file/access.rs", + index: 45, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Modification { + location: "git-index/tests/index/file/mod.rs", + previous_index: 45, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + index: 46, + entry_mode: Mode( + FILE, + ), + id: Sha1(8ba3a16384aacc37d01564b28401755ce8053f51), + }, + ] + "#); + Ok(()) +} + +#[test] +fn realistic_renames_disabled_3() -> crate::Result { + let changes = collect_changes_no_renames("r3-base", "r3-change")?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Addition { + location: "src/ein.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "src/gix.rs", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "src/plumbing-cli.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "src/porcelain-cli.rs", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + Ok(()) +} + +#[test] +fn realistic_renames_by_identity_3() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r3-base", + "r3-change", + Some(Rewrites { + copies: Some(Copies::default()), + limit: 1, + track_empty: true, + ..Default::default() + }), + )?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "src/plumbing-cli.rs", + source_index: 0, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "src/ein.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + Rewrite { + source_location: "src/porcelain-cli.rs", + source_index: 4, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "src/gix.rs", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_2() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r2-base", + "r2-change", + Some(Rewrites { + copies: Some(Copies::default()), + track_empty: false, + ..Default::default() + }), + )?; + + // We cannot capture renames if track-empty is disabled, as these are actually empty, + // and we can't take directory-shortcuts here (i.e. tracking knows no directories here + // as is the case with trees where we traverse breadth-first. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "git-sec/CHANGELOG.md", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/Cargo.toml", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/identity.rs", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/lib.rs", + index: 6, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/permission.rs", + index: 7, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/trust.rs", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/tests/identity/mod.rs", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/tests/sec.rs", + index: 10, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/CHANGELOG.md", + index: 231, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/Cargo.toml", + index: 232, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/identity.rs", + index: 233, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/lib.rs", + index: 234, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/permission.rs", + index: 235, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/trust.rs", + index: 236, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/tests/identity/mod.rs", + index: 237, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/tests/sec.rs", + index: 238, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_3_without_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + )?; + + // We don't actually track directory renames, only files show up. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "src/plumbing/options.rs", + source_index: 4, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + location: "src/plumbing-renamed/options/mod.rs", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/mod.rs", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + location: "src/plumbing-renamed/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/main.rs", + source_index: 2, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + location: "src/plumbing-renamed/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + let (changes, _out) = collect_changes_opts_with_pathspec( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + Some("src/plumbing/m*"), + )?; + + // Pathspecs are applied in advance, which affects rename tracking. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "src/plumbing/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Deletion { + location: "src/plumbing/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + }, + ] + "#); + + let (changes, _out) = collect_changes_opts_with_pathspec( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + Some("src/plumbing-renamed/m*"), + )?; + // One can also get the other side of the rename + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Addition { + location: "src/plumbing-renamed/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Addition { + location: "src/plumbing-renamed/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + }, + ] + "#); + + Ok(()) +} + +#[test] +fn unmerged_entries_and_intent_to_add() -> crate::Result { + let (changes, _out) = collect_changes_opts( + "r4-dir-rename-non-identity", + ".git/index", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + )?; + + // each unmerged entry is emitted separately, with rename tracking…, and no entry is emitted for + // paths that are mentioned there. + // Intent-to-add is transparent. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Base, + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Ours, + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + ] + "#); + + let changes = collect_changes_no_renames("r4-dir-rename-non-identity", ".git/index")?; + // …or without + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Base, + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Ours, + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + ] + "#); + + let (index, _, _, _, _) = repo_with_indices(".git/index", ".git/index", None)?; + assert_eq!( + index.entry_by_path("will-add".into()).map(|e| e.id), + Some(hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")), + "the file is there, but we don't see it" + ); + + Ok(()) +} + +mod util { + use gix_diff::rewrites; + use std::convert::Infallible; + use std::path::{Path, PathBuf}; + + fn repo_workdir() -> crate::Result { + gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_rewrites_repo.sh") + } + + pub fn repo_with_indices( + lhs: impl Into>, + rhs: impl Into>, + patterns: impl IntoIterator, + ) -> gix_testtools::Result<( + gix_index::State, + gix_index::State, + gix_diff::blob::Platform, + gix_odb::Handle, + gix_pathspec::Search, + )> { + let root = repo_workdir()?; + let odb = gix_odb::at(root.join(".git/objects"))?; + let lhs = read_index(&odb, &root, lhs.into())?; + let rhs = read_index(&odb, &root, rhs.into())?; + + let cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new(Default::default(), Default::default(), Vec::new(), Default::default()), + Default::default(), + gix_worktree::Stack::new( + &root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::default()), + Default::default(), + Vec::new(), + Vec::new(), + ), + ); + let pathspecs = gix_pathspec::Search::from_specs( + patterns + .into_iter() + .map(|p| gix_pathspec::Pattern::from_bytes(p.as_bytes(), Default::default()).expect("valid pattern")), + None, + &root, + )?; + Ok((lhs, rhs, cache, odb, pathspecs)) + } + + pub fn collect_changes_no_renames( + lhs: impl Into>, + rhs: impl Into>, + ) -> gix_testtools::Result> { + Ok(collect_changes_opts(lhs, rhs, None)?.0) + } + + pub fn collect_changes_opts( + lhs: impl Into>, + rhs: impl Into>, + options: Option, + ) -> gix_testtools::Result<(Vec, Option)> { + collect_changes_opts_with_pathspec(lhs, rhs, options, None) + } + + pub fn collect_changes_opts_with_pathspec( + lhs: impl Into>, + rhs: impl Into>, + options: Option, + patterns: impl IntoIterator, + ) -> gix_testtools::Result<(Vec, Option)> { + let (from, to, mut cache, odb, mut pathspecs) = repo_with_indices(lhs, rhs, patterns)?; + let mut out = Vec::new(); + let rewrites_info = gix_diff::index( + &from, + &to, + |change| -> Result<_, Infallible> { + out.push(change.into_owned()); + Ok(gix_diff::index::Action::Continue) + }, + options.map(|rewrites| gix_diff::index::RewriteOptions { + rewrites, + resource_cache: &mut cache, + find: &odb, + }), + &mut pathspecs, + &mut |_, _, _, _| false, + )?; + Ok((out, rewrites_info)) + } + + fn read_index( + odb: impl gix_object::Find, + root: &Path, + tree: Option<&str>, + ) -> gix_testtools::Result { + let Some(tree) = tree else { + return Ok(gix_index::State::new(gix_hash::Kind::Sha1)); + }; + if tree == ".git/index" { + Ok(gix_index::File::at(root.join(tree), gix_hash::Kind::Sha1, false, Default::default())?.into()) + } else { + let tree_id_path = root.join(tree).with_extension("tree"); + let hex_id = std::fs::read_to_string(&tree_id_path).map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Could not read '{}': {}", tree_id_path.display(), err), + ) + })?; + let tree_id = gix_hash::ObjectId::from_hex(hex_id.trim().as_bytes())?; + Ok(gix_index::State::from_tree(&tree_id, odb, Default::default())?) + } + } +} +use crate::hex_to_id; +use util::{collect_changes_no_renames, collect_changes_opts, collect_changes_opts_with_pathspec, repo_with_indices}; diff --git a/gix-diff/tests/diff/main.rs b/gix-diff/tests/diff/main.rs index 2163b5d3b01..a6be530ffdb 100644 --- a/gix-diff/tests/diff/main.rs +++ b/gix-diff/tests/diff/main.rs @@ -5,6 +5,7 @@ fn hex_to_id(hex: &str) -> gix_hash::ObjectId { } mod blob; +mod index; mod rewrites; mod tree; mod tree_with_rewrites; diff --git a/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar b/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar index 878a4113ef799f5cc50be490602134493ac692e6..b36453ef90ea0a352495478bdb8b1366b24a9bfb 100644 GIT binary patch delta 2134 zcmbVOYfuwc6wXaHfdnxD5zs+reL@gr@9rj>Fcv{&c-Y98qEi&Lo81khJkklMm0HA6 z>kp?@vXuBh3O;b!QeQ!%)M~9`AL6Sm(<0Te^+yLC>tLM$i-Nsu5=7e{w)t~*^4)vS zch7$3J6qi`w)%AR5lrR9LN^ee%|C!h6$znHMu;$n-v=gvaG}(>TEI(LN7*b_xfewJ z3EWvqjU~gHnXxh}BYnQuG@zqeDkBw;89un4qw zsv<2sO?;}Hp?OvWkw}f5krs-});LN^X0kjVEKQy@BLh7b%4R|wYRX;Q&;zLv9qE)( z`b%t0$D<}^PRQ=H)Gk@~?IDa!ckUO6OoN{A{Hekt7wuT|F=+9YxZegZ4fnGVPVgmK3_T=iyKEH6+(hGLV_en z3MrC+29l=y2}J;&x^L*k!`;iq*<5THSCoR!a|vvPz~gq8vlLIOf*OSC6NHzPubAFF z`}r;rLDYBd4z;MhTREOF{*w))sS#|B-8dKS&PfmtbuZT!30$#rRYeMpnhurB&uFOk z@CqkB;tA@f`xYhLzp_z<(=JM`3lWj2D*5J8Vl4lM2`NCo{$K7mi>ILgFO0s$$G zF>|b7=Z$uPw9~Z7%yNXuPO!)nY&8+HB>#9FOI9&-P#8Y2RiOqR%~H%9eXf_Qeca_~ zUs#&@V9M`podsnJ?w7`R@|R*sn6q}#dZfNJppFOHE!q{pr(GcjuIZG~z?cY*F&K?J zqzE%dnn;>tZ3ZZiHo+_~CgeddnEN(>zMVX9g6YOL2q=?pzP*=MU8kZiXg6)Un%~EK zbZ6@~EsBAHzBLM2Z=!Z3%6Juc1}@7ZecEgC!QbQp9q>V4QIh-s&6R#7g1B?u>J7)=wWjnw3yVcs6ameE-|5OH@l+&iMnybzk+Yb^W-*{UAYn=xuCyr$LWJ z?ie*`T3=PcuEEZpUPXiTaN)g+*1nX-vDdw@vUthI#|zHJt)d4OoM#4)>ht@LebO{b z+nso@{m{*hu=Z`4%Y99)pajS{c6b2vC`jL~V~X;2ImHiGqZy!TP<*l-8d#cPdBMyY zIo3vTCcDYN@eDMYdBR4S37ZYQs9-f3r9yJ z^qUiJqSyEtsH?`b!*o|5y1wvg?4biqIhB9Pi8@nW4X%_#N?;Q`?-@aa3Q`b0bCekt z!Br^WenY|INAyL!Ml%JU7jVJy6wMU1J2DPUyi-GCRGn+h&J0e`B~k*Dl2il^_JV9o z5ty`=-1!*+l421?C~3mr&+Y{b1~lo5^Kw^8Y=;*mqD2(*jN;w|dGlRtZfMZ-(ZT}1 zV@`;L4dH8W%|m~5z&vYC;eg>f@0TZ4gwu`z?8 zfr*)+fsuicu>pgDfuWJ1p$SCUVn%bu$&4zJlMj4!*zBORjB%0|>tt5DIu=87Q-jUz zb`6ZOObiSRoaxdV+O%hBZG_Q`3=G0zFVj3H`#7mi_H+;t2gx7p)&t7}X{fvxSbnC1 zupLOkq=9)FjD{-kVPI%n!oa}z6=;bF5Hnq1nfSLZdfP9Jqv8v88|~Q27ChINL1vPG zk_1HO{XiHEHNkgsi=!vVdXvV!*-<-YcCTqJWh)64c z%3ad2hegx2V&=will-add +git add --intent-to-add will-add + +# a file with skip-worktree flag, which has no bearing on tree/index diffs. +git update-index --skip-worktree src/shared.rs +rm src/shared.rs + mv ../*.tree . \ No newline at end of file