diff --git a/Cargo.lock b/Cargo.lock index 365fb927fa5..fbf3e71b887 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1715,12 +1715,15 @@ dependencies = [ "bstr", "document-features", "getrandom", + "gix-attributes 0.23.1", "gix-command", "gix-filter", "gix-fs 0.12.1", "gix-hash 0.15.1", + "gix-index 0.37.0", "gix-object 0.46.1", "gix-path 0.10.13", + "gix-pathspec", "gix-tempfile 15.0.0", "gix-trace 0.1.11", "gix-traverse 0.43.1", @@ -1738,8 +1741,10 @@ dependencies = [ "gix-filter", "gix-fs 0.12.1", "gix-hash 0.15.1", + "gix-index 0.37.0", "gix-object 0.46.1", "gix-odb", + "gix-pathspec", "gix-testtools", "gix-traverse 0.43.1", "gix-worktree 0.38.0", @@ -2808,6 +2813,7 @@ dependencies = [ "gix-odb", "gix-testtools", "gix-traverse 0.43.1", + "insta", ] [[package]] diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs index 99cbde7ed24..75bf33f9343 100644 --- a/gitoxide-core/src/repository/status.rs +++ b/gitoxide-core/src/repository/status.rs @@ -1,6 +1,6 @@ use anyhow::bail; use gix::bstr::{BStr, BString, ByteSlice}; -use gix::status::index_worktree::iter::Item; +use gix::status::{self, index_worktree}; use gix_status::index_as_worktree::{Change, Conflict, EntryStatus}; use std::path::Path; @@ -109,21 +109,54 @@ pub fn show( } None => gix::status::Submodule::AsConfigured { check_dirty: false }, }) - .into_index_worktree_iter(pathspecs)?; + .into_iter(pathspecs)?; for item in iter.by_ref() { let item = item?; match item { - Item::Modification { + status::Item::TreeIndex(change) => { + let (location, _, _, _) = change.fields(); + let status = match change { + gix::diff::index::Change::Addition { .. } => "A", + gix::diff::index::Change::Deletion { .. } => "D", + gix::diff::index::Change::Modification { .. } => "M", + gix::diff::index::Change::Rewrite { + ref source_location, .. + } => { + let source_location = gix::path::from_bstr(source_location.as_ref()); + let source_location = gix::path::relativize_with_prefix(&source_location, prefix); + writeln!( + out, + "{status: >2} {source_rela_path} → {dest_rela_path}", + status = "R", + source_rela_path = source_location.display(), + dest_rela_path = + gix::path::relativize_with_prefix(&gix::path::from_bstr(location), prefix).display(), + )?; + continue; + } + gix::diff::index::Change::Unmerged { .. } => { + // Unmerged entries from the worktree-index are displayed as part of the index-worktree comparison. + // Here we have nothing to do with them and can ignore. + continue; + } + }; + writeln!( + out, + "{status: >2} {rela_path}", + rela_path = gix::path::relativize_with_prefix(&gix::path::from_bstr(location), prefix).display(), + )?; + } + status::Item::IndexWorktree(index_worktree::Item::Modification { entry: _, entry_index: _, rela_path, status, - } => print_index_entry_status(&mut out, prefix, rela_path.as_ref(), status)?, - Item::DirectoryContents { + }) => print_index_entry_status(&mut out, prefix, rela_path.as_ref(), status)?, + status::Item::IndexWorktree(index_worktree::Item::DirectoryContents { entry, collapsed_directory_status, - } => { + }) => { if collapsed_directory_status.is_none() { writeln!( out, @@ -139,12 +172,12 @@ pub fn show( )?; } } - Item::Rewrite { + status::Item::IndexWorktree(index_worktree::Item::Rewrite { source, dirwalk_entry, copy: _, // TODO: how to visualize copies? .. - } => { + }) => { // TODO: handle multi-status characters, there can also be modifications at the same time as determined by their ID and potentially diffstats. writeln!( out, @@ -175,9 +208,8 @@ pub fn show( writeln!(err, "{outcome:#?}", outcome = out.index_worktree).ok(); } - writeln!(err, "\nhead -> index isn't implemented yet")?; - progress.init(Some(out.index.entries().len()), gix::progress::count("files")); - progress.set(out.index.entries().len()); + progress.init(Some(out.worktree_index.entries().len()), gix::progress::count("files")); + progress.set(out.worktree_index.entries().len()); progress.show_throughput(start); Ok(()) } diff --git a/gitoxide-core/src/repository/tree.rs b/gitoxide-core/src/repository/tree.rs index 04881317737..fcde83f8fb1 100644 --- a/gitoxide-core/src/repository/tree.rs +++ b/gitoxide-core/src/repository/tree.rs @@ -1,18 +1,17 @@ -use std::{borrow::Cow, io}; - use anyhow::bail; use gix::Tree; +use std::io::BufWriter; +use std::{borrow::Cow, io}; use crate::OutputFormat; mod entries { - use std::collections::VecDeque; - use gix::{ bstr::{BStr, BString, ByteSlice, ByteVec}, objs::tree::EntryRef, traverse::tree::visit::Action, }; + use std::collections::VecDeque; use crate::repository::tree::format_entry; @@ -58,6 +57,9 @@ mod entries { } fn push_element(&mut self, name: &BStr) { + if name.is_empty() { + return; + } if !self.path.is_empty() { self.path.push(b'/'); } @@ -66,6 +68,10 @@ mod entries { } impl gix::traverse::tree::Visit for Traverse<'_, '_> { + fn pop_back_tracked_path_and_set_current(&mut self) { + self.path = self.path_deque.pop_back().unwrap_or_default(); + } + fn pop_front_tracked_path_and_set_current(&mut self) { self.path = self.path_deque.pop_front().expect("every parent is set only once"); } @@ -91,12 +97,12 @@ mod entries { fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action { let size = self .repo - .and_then(|repo| repo.find_object(entry.oid).map(|o| o.data.len()).ok()); + .and_then(|repo| repo.find_header(entry.oid).map(|h| h.size()).ok()); if let Some(out) = &mut self.out { format_entry(out, entry, self.path.as_bstr(), size).ok(); } if let Some(size) = size { - self.stats.num_bytes += size as u64; + self.stats.num_bytes += size; } use gix::object::tree::EntryKind::*; @@ -154,8 +160,9 @@ pub fn entries( let tree = treeish_to_tree(treeish, &repo)?; if recursive { - let mut delegate = entries::Traverse::new(extended.then_some(&repo), Some(&mut out)); - tree.traverse().breadthfirst(&mut delegate)?; + let mut write = BufWriter::new(out); + let mut delegate = entries::Traverse::new(extended.then_some(&repo), Some(&mut write)); + tree.traverse().depthfirst(&mut delegate)?; } else { for entry in tree.iter() { let entry = entry?; @@ -163,9 +170,7 @@ pub fn entries( &mut out, &entry.inner, entry.inner.filename, - extended - .then(|| entry.id().object().map(|o| o.data.len())) - .transpose()?, + extended.then(|| entry.id().header().map(|o| o.size())).transpose()?, )?; } } @@ -182,12 +187,12 @@ fn format_entry( mut out: impl io::Write, entry: &gix::objs::tree::EntryRef<'_>, filename: &gix::bstr::BStr, - size: Option, + size: Option, ) -> std::io::Result<()> { use gix::objs::tree::EntryKind::*; - writeln!( + write!( out, - "{} {}{} {}", + "{} {}{} ", match entry.mode.kind() { Tree => "TREE", Blob => "BLOB", @@ -196,7 +201,8 @@ fn format_entry( Commit => "SUBM", }, entry.oid, - size.map_or_else(|| "".into(), |s| Cow::Owned(format!(" {s}"))), - filename - ) + size.map_or_else(|| "".into(), |s| Cow::Owned(format!(" {s}"))) + )?; + out.write_all(filename)?; + out.write_all(b"\n") } diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 70a48195093..e3de39938c3 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -13,11 +13,13 @@ rust-version = "1.65" autotests = false [features] -default = ["blob"] -## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. +default = ["blob", "index"] +## Enable diffing of blobs using imara-diff. blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-traverse"] +## Enable diffing of two indices, which also allows for a generic rewrite tracking implementation. +index = ["dep:gix-index", "dep:gix-pathspec", "dep:gix-attributes"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-index?/serde"] ## Make it possible to compile to the `wasm32-unknown-unknown` target. wasm = ["dep:getrandom"] @@ -25,6 +27,9 @@ wasm = ["dep:getrandom"] doctest = false [dependencies] +gix-index = { version = "^0.37.0", path = "../gix-index", optional = true } +gix-pathspec = { version = "^0.8.1", path = "../gix-pathspec", optional = true } +gix-attributes = { version = "^0.23.1", path = "../gix-attributes", optional = true } gix-hash = { version = "^0.15.1", path = "../gix-hash" } gix-object = { version = "^0.46.1", path = "../gix-object" } gix-filter = { version = "^0.16.0", path = "../gix-filter", optional = true } diff --git a/gix-diff/src/index/change.rs b/gix-diff/src/index/change.rs new file mode 100644 index 00000000000..da5d98d1f4e --- /dev/null +++ b/gix-diff/src/index/change.rs @@ -0,0 +1,197 @@ +use crate::index::{Change, ChangeRef}; +use crate::rewrites; +use crate::rewrites::tracker::ChangeKind; +use crate::tree::visit::Relation; +use bstr::BStr; +use gix_object::tree; +use std::borrow::Cow; + +impl ChangeRef<'_, '_> { + /// Copy everything into an owned version of this instance. + pub fn into_owned(self) -> Change { + match self { + ChangeRef::Addition { + location, + index, + entry_mode, + id, + } => ChangeRef::Addition { + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Deletion { + location, + index, + entry_mode, + id, + } => ChangeRef::Deletion { + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Modification { + location, + previous_index, + previous_entry_mode, + previous_id, + index, + entry_mode, + id, + } => ChangeRef::Modification { + location: Cow::Owned(location.into_owned()), + previous_index, + previous_entry_mode, + previous_id: Cow::Owned(previous_id.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + ChangeRef::Rewrite { + source_location, + source_index, + source_entry_mode, + source_id, + location, + index, + entry_mode, + id, + copy, + } => ChangeRef::Rewrite { + source_location: Cow::Owned(source_location.into_owned()), + source_index, + source_entry_mode, + source_id: Cow::Owned(source_id.into_owned()), + location: Cow::Owned(location.into_owned()), + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + copy, + }, + ChangeRef::Unmerged { + location, + stage, + index, + entry_mode, + id, + } => ChangeRef::Unmerged { + location: Cow::Owned(location.into_owned()), + stage, + index, + entry_mode, + id: Cow::Owned(id.into_owned()), + }, + } + } +} + +impl ChangeRef<'_, '_> { + /// Return all shared fields among all variants: `(location, index, entry_mode, id)` + /// + /// In case of rewrites, the fields return to the current change. + pub fn fields(&self) -> (&BStr, usize, gix_index::entry::Mode, &gix_hash::oid) { + match self { + ChangeRef::Addition { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Deletion { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Modification { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Rewrite { + location, + index, + entry_mode, + id, + .. + } + | ChangeRef::Unmerged { + location, + index, + entry_mode, + id, + .. + } => (location.as_ref(), *index, *entry_mode, id), + } + } +} + +impl rewrites::tracker::Change for ChangeRef<'_, '_> { + fn id(&self) -> &gix_hash::oid { + match self { + ChangeRef::Addition { id, .. } | ChangeRef::Deletion { id, .. } | ChangeRef::Modification { id, .. } => { + id.as_ref() + } + ChangeRef::Rewrite { .. } | ChangeRef::Unmerged { .. } => { + unreachable!("BUG") + } + } + } + + fn relation(&self) -> Option { + None + } + + fn kind(&self) -> ChangeKind { + match self { + ChangeRef::Addition { .. } => ChangeKind::Addition, + ChangeRef::Deletion { .. } => ChangeKind::Deletion, + ChangeRef::Modification { .. } => ChangeKind::Modification, + ChangeRef::Rewrite { .. } => { + unreachable!("BUG: rewrites can't be determined ahead of time") + } + ChangeRef::Unmerged { .. } => { + unreachable!("BUG: unmerged don't participate in rename tracking") + } + } + } + + fn entry_mode(&self) -> tree::EntryMode { + match self { + ChangeRef::Addition { entry_mode, .. } + | ChangeRef::Deletion { entry_mode, .. } + | ChangeRef::Modification { entry_mode, .. } + | ChangeRef::Rewrite { entry_mode, .. } + | ChangeRef::Unmerged { entry_mode, .. } => { + entry_mode + .to_tree_entry_mode() + // Default is for the impossible case - just don't let it participate in rename tracking. + .unwrap_or(tree::EntryKind::Tree.into()) + } + } + } + + fn id_and_entry_mode(&self) -> (&gix_hash::oid, tree::EntryMode) { + match self { + ChangeRef::Addition { id, entry_mode, .. } + | ChangeRef::Deletion { id, entry_mode, .. } + | ChangeRef::Modification { id, entry_mode, .. } + | ChangeRef::Rewrite { id, entry_mode, .. } + | ChangeRef::Unmerged { id, entry_mode, .. } => { + ( + id, + entry_mode + .to_tree_entry_mode() + // Default is for the impossible case - just don't let it participate in rename tracking. + .unwrap_or(tree::EntryKind::Tree.into()), + ) + } + } + } +} diff --git a/gix-diff/src/index/function.rs b/gix-diff/src/index/function.rs new file mode 100644 index 00000000000..bbf2a9a3140 --- /dev/null +++ b/gix-diff/src/index/function.rs @@ -0,0 +1,324 @@ +use super::{Action, ChangeRef, Error, RewriteOptions}; +use crate::rewrites; +use bstr::{BStr, BString, ByteSlice}; +use gix_filter::attributes::glob::pattern::Case; +use std::borrow::Cow; +use std::cell::RefCell; +use std::cmp::Ordering; + +/// Produce an entry-by-entry diff between `lhs` and `rhs`, sending changes to `cb(change) -> Action` for consumption, +/// which would turn `lhs` into `rhs` if applied. +/// Use `pathspec` to reduce the set of entries to look at, and `pathspec_attributes` may be used by pathspecs that perform +/// attribute lookups. +/// +/// If `cb` indicated that the operation should be cancelled, no error is triggered as this isn't supposed to +/// occur through user-interaction - this diff is typically too fast. +/// +/// Note that rewrites will be emitted at the end, so no ordering can be assumed. They will only be tracked if +/// `rewrite_options` is `Some`. Note that the set of entries participating in rename tracking is affected by `pathspec`. +/// +/// Return the outcome of the rewrite tracker if it was enabled. +/// +/// Note that only `rhs` may contain unmerged entries, as `rhs` is expected to be the index read from `.git/index`. +/// Unmerged entries are always provided as changes, one stage at a time, up to three stages for *base*, *ours* and *theirs*. +/// Conceptually, `rhs` is *ours*, and `lhs` is *theirs*. +/// The entries in `lhs` and `rhs` are both expected to be sorted like index entries are typically sorted. +/// +/// Note that sparse indices aren't supported, they must be "unsparsed" before. +pub fn diff<'rhs, 'lhs: 'rhs, E, Find>( + lhs: &'lhs gix_index::State, + rhs: &'rhs gix_index::State, + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + rewrite_options: Option>, + pathspec: &mut gix_pathspec::Search, + pathspec_attributes: &mut dyn FnMut(&BStr, Case, bool, &mut gix_attributes::search::Outcome) -> bool, +) -> Result, Error> +where + E: Into>, + Find: gix_object::FindObjectOrHeader, +{ + if lhs.is_sparse() || rhs.is_sparse() { + return Err(Error::IsSparse); + } + if lhs + .entries() + .iter() + .any(|e| e.stage() != gix_index::entry::Stage::Unconflicted) + { + return Err(Error::LhsHasUnmerged); + } + + let lhs_range = lhs + .prefixed_entries_range(pathspec.common_prefix()) + .unwrap_or_else(|| 0..lhs.entries().len()); + let rhs_range = rhs + .prefixed_entries_range(pathspec.common_prefix()) + .unwrap_or_else(|| 0..rhs.entries().len()); + + let pattern_matches = RefCell::new(|relative_path, entry: &gix_index::Entry| { + pathspec + .pattern_matching_relative_path(relative_path, Some(entry.mode.is_submodule()), pathspec_attributes) + .map_or(false, |m| !m.is_excluded()) + }); + + let (mut lhs_iter, mut rhs_iter) = ( + lhs.entries()[lhs_range.clone()] + .iter() + .enumerate() + .map(|(idx, e)| (idx + lhs_range.start, e.path(lhs), e)) + .filter(|(_, path, e)| pattern_matches.borrow_mut()(path, e)), + rhs.entries()[rhs_range.clone()] + .iter() + .enumerate() + .map(|(idx, e)| (idx + rhs_range.start, e.path(rhs), e)) + .filter(|(_, path, e)| pattern_matches.borrow_mut()(path, e)), + ); + + let mut conflicting_paths = Vec::::new(); + let mut cb = move |change: ChangeRef<'lhs, 'rhs>| { + let (location, ..) = change.fields(); + if let ChangeRef::Unmerged { .. } = &change { + if let Err(insert_idx) = conflicting_paths.binary_search_by(|p| p.as_bstr().cmp(location)) { + conflicting_paths.insert(insert_idx, location.to_owned()); + } + cb(change) + } else if conflicting_paths + .binary_search_by(|p| p.as_bstr().cmp(location)) + .is_err() + { + cb(change) + } else { + Ok(Action::Continue) + } + }; + let mut resource_cache_storage = None; + let mut tracker = rewrite_options.map( + |RewriteOptions { + resource_cache, + rewrites, + find, + }| { + resource_cache_storage = Some((resource_cache, find)); + rewrites::Tracker::>::new(rewrites) + }, + ); + + let (mut lhs_storage, mut rhs_storage) = (lhs_iter.next(), rhs_iter.next()); + loop { + match (lhs_storage, rhs_storage) { + (Some(lhs), Some(rhs)) => { + match emit_unmerged_ignore_intent_to_add(rhs, &mut cb)? { + None => {} + Some(Action::Cancel) => return Ok(None), + Some(Action::Continue) => { + rhs_storage = rhs_iter.next(); + continue; + } + }; + + let (lhs_idx, lhs_path, lhs_entry) = lhs; + let (rhs_idx, rhs_path, rhs_entry) = rhs; + match lhs_path.cmp(rhs_path) { + Ordering::Less => match emit_deletion(lhs, &mut cb, tracker.as_mut())? { + Action::Continue => { + lhs_storage = lhs_iter.next(); + } + Action::Cancel => return Ok(None), + }, + Ordering::Equal => { + if lhs_entry.id != rhs_entry.id || lhs_entry.mode != rhs_entry.mode { + let change = ChangeRef::Modification { + location: Cow::Borrowed(rhs_path), + previous_index: lhs_idx, + previous_entry_mode: lhs_entry.mode, + previous_id: Cow::Borrowed(lhs_entry.id.as_ref()), + index: rhs_idx, + entry_mode: rhs_entry.mode, + id: Cow::Borrowed(rhs_entry.id.as_ref()), + }; + + let change = match tracker.as_mut() { + None => Some(change), + Some(tracker) => tracker.try_push_change(change, rhs_path), + }; + if let Some(change) = change { + match cb(change).map_err(|err| Error::Callback(err.into()))? { + Action::Continue => {} + Action::Cancel => return Ok(None), + } + } + } + lhs_storage = lhs_iter.next(); + rhs_storage = rhs_iter.next(); + } + Ordering::Greater => match emit_addition(rhs, &mut cb, tracker.as_mut())? { + Action::Continue => { + rhs_storage = rhs_iter.next(); + } + Action::Cancel => return Ok(None), + }, + } + } + (Some(lhs), None) => match emit_deletion(lhs, &mut cb, tracker.as_mut())? { + Action::Cancel => return Ok(None), + Action::Continue => { + lhs_storage = lhs_iter.next(); + } + }, + (None, Some(rhs)) => match emit_addition(rhs, &mut cb, tracker.as_mut())? { + Action::Cancel => return Ok(None), + Action::Continue => { + rhs_storage = rhs_iter.next(); + } + }, + (None, None) => break, + } + } + + if let Some((mut tracker, (resource_cache, find))) = tracker.zip(resource_cache_storage) { + let mut cb_err = None; + let out = tracker.emit( + |dst, src| { + let change = if let Some(src) = src { + let (lhs_path, lhs_index, lhs_mode, lhs_id) = src.change.fields(); + let (rhs_path, rhs_index, rhs_mode, rhs_id) = dst.change.fields(); + ChangeRef::Rewrite { + source_location: Cow::Owned(lhs_path.into()), + source_index: lhs_index, + source_entry_mode: lhs_mode, + source_id: Cow::Owned(lhs_id.into()), + location: Cow::Owned(rhs_path.into()), + index: rhs_index, + entry_mode: rhs_mode, + id: Cow::Owned(rhs_id.into()), + copy: match src.kind { + rewrites::tracker::visit::SourceKind::Rename => false, + rewrites::tracker::visit::SourceKind::Copy => true, + }, + } + } else { + dst.change + }; + match cb(change) { + Ok(Action::Continue) => crate::tree::visit::Action::Continue, + Ok(Action::Cancel) => crate::tree::visit::Action::Cancel, + Err(err) => { + cb_err = Some(Error::Callback(err.into())); + crate::tree::visit::Action::Cancel + } + } + }, + resource_cache, + find, + |push| { + for (index, entry) in lhs.entries().iter().enumerate() { + let path = entry.path(rhs); + push( + ChangeRef::Modification { + location: Cow::Borrowed(path), + previous_index: 0, /* does not matter */ + previous_entry_mode: entry.mode, + previous_id: Cow::Owned(entry.id.kind().null()), + index, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }, + path, + ); + } + Ok::<_, std::convert::Infallible>(()) + }, + )?; + + if let Some(err) = cb_err { + Err(err) + } else { + Ok(Some(out)) + } + } else { + Ok(None) + } +} + +fn emit_deletion<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'lhs BStr, &'lhs gix_index::Entry), + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + tracker: Option<&mut rewrites::Tracker>>, +) -> Result +where + E: Into>, +{ + let change = ChangeRef::Deletion { + location: Cow::Borrowed(path), + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }; + + let change = match tracker { + None => change, + Some(tracker) => match tracker.try_push_change(change, path) { + Some(change) => change, + None => return Ok(Action::Continue), + }, + }; + + cb(change).map_err(|err| Error::Callback(err.into())) +} + +fn emit_addition<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'rhs BStr, &'rhs gix_index::Entry), + mut cb: impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, + tracker: Option<&mut rewrites::Tracker>>, +) -> Result +where + E: Into>, +{ + if let Some(action) = emit_unmerged_ignore_intent_to_add((idx, path, entry), &mut cb)? { + return Ok(action); + } + + let change = ChangeRef::Addition { + location: Cow::Borrowed(path), + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }; + + let change = match tracker { + None => change, + Some(tracker) => match tracker.try_push_change(change, path) { + Some(change) => change, + None => return Ok(Action::Continue), + }, + }; + + cb(change).map_err(|err| Error::Callback(err.into())) +} + +fn emit_unmerged_ignore_intent_to_add<'rhs, 'lhs: 'rhs, E>( + (idx, path, entry): (usize, &'rhs BStr, &'rhs gix_index::Entry), + cb: &mut impl FnMut(ChangeRef<'lhs, 'rhs>) -> Result, +) -> Result, Error> +where + E: Into>, +{ + if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { + return Ok(Some(Action::Continue)); + } + let stage = entry.stage(); + if stage == gix_index::entry::Stage::Unconflicted { + return Ok(None); + } + + Ok(Some( + cb(ChangeRef::Unmerged { + location: Cow::Borrowed(path), + stage, + index: idx, + entry_mode: entry.mode, + id: Cow::Borrowed(entry.id.as_ref()), + }) + .map_err(|err| Error::Callback(err.into()))?, + )) +} diff --git a/gix-diff/src/index/mod.rs b/gix-diff/src/index/mod.rs new file mode 100644 index 00000000000..0a66b3ca1e7 --- /dev/null +++ b/gix-diff/src/index/mod.rs @@ -0,0 +1,141 @@ +use bstr::BStr; +use std::borrow::Cow; + +/// The error returned by [`index()`](crate::index()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Cannot diff indices that contain sparse entries")] + IsSparse, + #[error("Unmerged entries aren't allowed in the left-hand index, only in the right-hand index")] + LhsHasUnmerged, + #[error("The callback indicated failure")] + Callback(#[source] Box), + #[error("Failure during rename tracking")] + RenameTracking(#[from] crate::rewrites::tracker::emit::Error), +} + +/// What to do after a [ChangeRef] was passed ot the callback of [`index()`](crate::index()). +#[derive(Default, Clone, Copy, PartialOrd, PartialEq, Ord, Eq, Hash)] +pub enum Action { + /// Continue the operation. + #[default] + Continue, + /// Stop the operation immediately. + /// + /// This is useful if one just wants to determine if something changed or not. + Cancel, +} + +/// Options to configure how rewrites are tracked as part of the [`index()`](crate::index()) call. +pub struct RewriteOptions<'a, Find> +where + Find: gix_object::FindObjectOrHeader, +{ + /// The cache to be used when rename-tracking by similarity is enabled, typically the default. + /// Note that it's recommended to call [`clear_resource_cache()`](`crate::blob::Platform::clear_resource_cache()`) + /// between the calls to avoid runaway memory usage, as the cache isn't limited. + pub resource_cache: &'a mut crate::blob::Platform, + /// A way to lookup objects from the object database, for use in similarity checks. + pub find: &'a Find, + /// Configure how rewrites are tracked. + pub rewrites: crate::Rewrites, +} + +/// Identify a change that would have to be applied to `lhs` to obtain `rhs`, as provided in [`index()`](crate::index()). +/// +/// Note that all variants are unconflicted entries, unless it's the [`Self::Unmerged`] one. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ChangeRef<'lhs, 'rhs> { + /// An entry was added to `rhs`. + Addition { + /// The location of the newly added entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was removed from `rhs`. + Deletion { + /// The location the entry that doesn't exist in `rhs`. + location: Cow<'lhs, BStr>, + /// The index into the entries array of `lhs` for full access. + index: usize, + /// The mode of the entry in `lhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `lhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was modified, i.e. has changed its content or its mode. + Modification { + /// The location of the modified entry both in `lhs` and `rhs`. + location: Cow<'rhs, BStr>, + /// The index into the entries array of `lhs` for full access. + previous_index: usize, + /// The previous mode of the entry, in `lhs`. + previous_entry_mode: gix_index::entry::Mode, + /// The previous object id of the entry, in `lhs`. + previous_id: Cow<'lhs, gix_hash::oid>, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, + /// An entry was renamed or copied from `lhs` to `rhs`. + /// + /// A rename is effectively fusing together the `Deletion` of the source and the `Addition` of the destination. + Rewrite { + /// The location of the source of the rename or copy operation, in `lhs`. + source_location: Cow<'lhs, BStr>, + /// The index of the entry before the rename, into the entries array of `rhs` for full access. + source_index: usize, + /// The mode of the entry before the rewrite, in `lhs`. + source_entry_mode: gix_index::entry::Mode, + /// The object id of the entry before the rewrite. + /// + /// Note that this is the same as `id` if we require the [similarity to be 100%](super::Rewrites::percentage), but may + /// be different otherwise. + source_id: Cow<'lhs, gix_hash::oid>, + + /// The current location of the entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The index of the entry after the rename, into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry after the rename in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry after the rename in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + + /// If true, this rewrite is created by copy, and `source_id` is pointing to its source. Otherwise, it's a rename, + /// and `source_id` points to a deleted object, as renames are tracked as deletions and additions of the same + /// or similar content. + copy: bool, + }, + /// One of up to three unmerged entries that are provided in order, one for each stage, ordered + /// by `location` and `stage`. + /// + /// Unmerged entries also don't participate in rename tracking, and they are never present in `lhs`. + Unmerged { + /// The current location of the entry in `rhs`. + location: Cow<'rhs, BStr>, + /// The stage of the entry, either *base*, *ours*, or *theirs*. + stage: gix_index::entry::Stage, + /// The index into the entries array of `rhs` for full access. + index: usize, + /// The mode of the entry in `rhs`. + entry_mode: gix_index::entry::Mode, + /// The object id of the entry in `rhs`. + id: Cow<'rhs, gix_hash::oid>, + }, +} + +/// The fully-owned version of [`ChangeRef`]. +pub type Change = ChangeRef<'static, 'static>; + +mod change; +pub(super) mod function; diff --git a/gix-diff/src/lib.rs b/gix-diff/src/lib.rs index ce2451176f5..f438ee4c275 100644 --- a/gix-diff/src/lib.rs +++ b/gix-diff/src/lib.rs @@ -58,6 +58,12 @@ pub mod tree_with_rewrites; #[cfg(feature = "blob")] pub use tree_with_rewrites::function::diff as tree_with_rewrites; +/// +#[cfg(feature = "index")] +pub mod index; +#[cfg(feature = "index")] +pub use index::function::diff as index; + /// #[cfg(feature = "blob")] pub mod blob; diff --git a/gix-diff/src/tree/recorder.rs b/gix-diff/src/tree/recorder.rs index 69055ba16e7..9a3783447b9 100644 --- a/gix-diff/src/tree/recorder.rs +++ b/gix-diff/src/tree/recorder.rs @@ -89,6 +89,9 @@ impl Recorder { } fn push_element(&mut self, name: &BStr) { + if name.is_empty() { + return; + } if !self.path.is_empty() { self.path.push(b'/'); } diff --git a/gix-diff/src/tree_with_rewrites/function.rs b/gix-diff/src/tree_with_rewrites/function.rs index b37856a3379..df0b0be1a32 100644 --- a/gix-diff/src/tree_with_rewrites/function.rs +++ b/gix-diff/src/tree_with_rewrites/function.rs @@ -239,6 +239,10 @@ mod tree_to_changes { } impl gix_traverse::tree::Visit for Delegate<'_> { + fn pop_back_tracked_path_and_set_current(&mut self) { + self.recorder.pop_back_tracked_path_and_set_current(); + } + fn pop_front_tracked_path_and_set_current(&mut self) { self.recorder.pop_front_tracked_path_and_set_current(); } diff --git a/gix-diff/tests/Cargo.toml b/gix-diff/tests/Cargo.toml index 9197a86be5b..35645e05d90 100644 --- a/gix-diff/tests/Cargo.toml +++ b/gix-diff/tests/Cargo.toml @@ -17,8 +17,9 @@ name = "diff" path = "diff/main.rs" [dev-dependencies] -insta = "1.40.0" gix-diff = { path = ".." } +gix-index = { version = "^0.37.0", path = "../../gix-index" } +gix-pathspec = { version = "^0.8.1", path = "../../gix-pathspec" } gix-hash = { path = "../../gix-hash" } gix-fs = { path = "../../gix-fs" } gix-worktree = { path = "../../gix-worktree" } @@ -27,5 +28,7 @@ gix-odb = { path = "../../gix-odb" } gix-filter = { path = "../../gix-filter" } gix-traverse = { path = "../../gix-traverse" } gix-testtools = { path = "../../tests/tools" } + +insta = "1.40.0" shell-words = "1" pretty_assertions = "1.4.0" diff --git a/gix-diff/tests/diff/index.rs b/gix-diff/tests/diff/index.rs new file mode 100644 index 00000000000..7e936a309cf --- /dev/null +++ b/gix-diff/tests/diff/index.rs @@ -0,0 +1,1367 @@ +use gix_diff::index::Change; +use gix_diff::rewrites::{Copies, CopySource}; +use gix_diff::Rewrites; +use gix_object::bstr::BStr; + +#[test] +fn empty_to_new_tree_without_rename_tracking() -> crate::Result { + let changes = collect_changes_no_renames(None, "c1 - initial").expect("really just an addition - nothing to track"); + insta::assert_debug_snapshot!(changes, @r#" + [ + Addition { + location: "a", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "b", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "d", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "dir/c", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + { + let (lhs, rhs, _cache, _odb, mut pathspec) = repo_with_indices(None, "c1 - initial", None)?; + let err = gix_diff::index( + &lhs, + &rhs, + |_change| Err(std::io::Error::new(std::io::ErrorKind::Other, "custom error")), + None::>, + &mut pathspec, + &mut |_, _, _, _| true, + ) + .unwrap_err(); + assert_eq!( + format!("{err:?}"), + r#"Callback(Custom { kind: Other, error: "custom error" })"#, + "custom errors made visible and not squelched" + ); + } + Ok(()) +} + +#[test] +fn changes_against_modified_tree_with_filename_tracking() -> crate::Result { + let changes = collect_changes_no_renames("c2", "c3-modification")?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "a", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(78981922613b2afb6025042ff6bd878ac1994e85), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(b4f17b61de71d9b2e54ac9e62b1629ae2d97a6a7), + }, + Modification { + location: "dir/c", + previous_index: 3, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(6695780ceb14b05e076a99bbd2babf34723b3464), + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + }, + ] + "#); + Ok(()) +} + +#[test] +fn renames_by_identity() -> crate::Result { + for (from, to, expected, assert_msg, track_empty) in [ + ( + "c3-modification", + "r1-identity", + vec![BStr::new("a"), "dir/a-moved".into()], + "one rename and nothing else", + false, + ), + ( + "c4 - add identical files", + "r2-ambiguous", + vec![ + "s1".into(), + "b1".into(), + "s2".into(), + "b2".into(), + "s3".into(), + "z".into(), + ], + "multiple possible sources decide by ordering everything lexicographically", + true, + ), + ( + "c4 - add identical files", + "r2-ambiguous", + vec![], + "nothing is tracked with `track_empty = false`", + false, + ), + ( + "c5 - add links", + "r4-symlinks", + vec!["link-1".into(), "renamed-link-1".into()], + "symlinks are only tracked by identity", + false, + ), + ( + "r1-identity", + "c4 - add identical files", + vec![], + "not having any renames is OK as well", + false, + ), + ( + "tc1-identity", + "tc1-identity", + vec![], + "copy tracking is off by default", + false, + ), + ] { + for percentage in [None, Some(0.5)] { + let (changes, out) = collect_changes_opts( + from, + to, + Some(Rewrites { + percentage, + track_empty, + ..Default::default() + }), + )?; + let actual: Vec<_> = changes + .into_iter() + .flat_map(|c| match c { + Change::Rewrite { + source_location, + location, + copy, + .. + } => { + assert!(!copy); + vec![source_location, location] + } + _ => vec![], + }) + .collect(); + + assert_eq!(actual, expected, "{assert_msg}"); + #[cfg(not(windows))] + assert_eq!( + out.expect("present as rewrites are configured").num_similarity_checks, + 0, + "there are no fuzzy checks in if everything was resolved by identity only" + ); + } + } + Ok(()) +} + +#[test] +fn rename_by_similarity() -> crate::Result { + insta::allow_duplicates! { + for percentage in [ + None, + Some(0.76), /*cutoff point where git stops seeing it as equal */ + ] { + let (changes, out) = collect_changes_opts( + "r2-ambiguous", + "r3-simple", + Some(Rewrites { + percentage, + ..Default::default() + }), + ).expect("errors can only happen with IO or ODB access fails"); + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(61780798228d17af2d34fce4cfbdf35556832472), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + }, + Deletion { + location: "dir/c", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + }, + Addition { + location: "dir/c-moved", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f01e8ddf5adc56985b9a1cda6d7c7ef9e3abe034), + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, if percentage.is_some() { 1 } else { 0 }); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + } + } + + let (changes, out) = collect_changes_opts( + "r2-ambiguous", + "r3-simple", + Some(Rewrites { + percentage: Some(0.6), + limit: 1, // has no effect as it's just one item here. + ..Default::default() + }), + ) + .expect("it found all items at the cut-off point, similar to git"); + + insta::assert_debug_snapshot!(changes, @r#" + [ + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(61780798228d17af2d34fce4cfbdf35556832472), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + }, + Rewrite { + source_location: "dir/c", + source_index: 5, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(40006fcef15a8853a1b7ae186d93b7d680fd29cf), + location: "dir/c-moved", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f01e8ddf5adc56985b9a1cda6d7c7ef9e3abe034), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 1); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + Ok(()) +} + +#[test] +fn renames_by_similarity_with_limit() -> crate::Result { + let (changes, out) = collect_changes_opts( + "c6", + "r5", + Some(Rewrites { + limit: 1, // prevent fuzzy tracking from happening + ..Default::default() + }), + )?; + assert_eq!( + changes.iter().filter(|c| matches!(c, Change::Rewrite { .. })).count(), + 0, + "fuzzy tracking is effectively disabled due to limit" + ); + let actual: Vec<_> = changes.iter().map(|c| c.fields().0).collect(); + assert_eq!(actual, ["f1", "f1-renamed", "f2", "f2-renamed"],); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 4); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_by_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "c7", + "tc1-identity", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFiles, + percentage: None, + }), + limit: 1, // the limit isn't actually used for identity based checks + ..Default::default() + }), + )?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "c1", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "c2", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(f00c965d8307308469e537302baa73048488f162), + location: "dir/c3", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(f00c965d8307308469e537302baa73048488f162), + copy: true, + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_by_similarity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc1-identity", + "tc2-similarity", + Some(Rewrites { + copies: Some(Copies::default()), + ..Default::default() + }), + )?; + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c4", + index: 6, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c5", + index: 7, + entry_mode: Mode( + FILE, + ), + id: Sha1(08fe19ca4d2f79624f35333157d610811efc1aed), + copy: true, + }, + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "dir/c6", + index: 12, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 2, + "two are similar, the other one is identical" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_in_entire_tree_by_similarity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies::default()), + ..Default::default() + }), + )?; + assert_eq!( + changes.iter().filter(|c| matches!(c, Change::Rewrite { .. })).count(), + 0, + "needs --find-copies-harder to detect rewrites here" + ); + let actual: Vec<_> = changes.iter().map(|c| c.fields().0).collect(); + assert_eq!(actual, ["b", "c6", "c7", "newly-added"]); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 3, + "it does have some candidates, probably for rename tracking" + ); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFilesAndAllSources, + ..Default::default() + }), + ..Default::default() + }), + )?; + + // As the full-tree traversal order is different, it sees candidates in different order. + // Let's keep this as expectations, as in future there might be a candidate-based search that considers filenames + // or similarity in names. + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c6", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "r/c3di", + source_index: 12, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + location: "c7", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + Rewrite { + source_location: "c5", + source_index: 7, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(08fe19ca4d2f79624f35333157d610811efc1aed), + location: "newly-added", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(97b3d1a5707f8a11fa5fa8bc6c3bd7b3965601fd), + copy: true, + }, + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(f198d0640214092732566fb00543163845c8252c), + }, + ] + "#); + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 4); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn copies_in_entire_tree_by_similarity_with_limit() -> crate::Result { + let (changes, out) = collect_changes_opts( + "tc2-similarity", + "tc3-find-harder", + Some(Rewrites { + copies: Some(Copies { + source: CopySource::FromSetOfModifiedFilesAndAllSources, + ..Default::default() + }), + limit: 2, // similarity checks can't be made that way + track_empty: false, + ..Default::default() + }), + )?; + + // Again, it finds a different first match for the rewrite compared to tree-traversal, expected for now. + insta::assert_debug_snapshot!(changes, @r#" + [ + Rewrite { + source_location: "base", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + location: "c6", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(3bb459b831ea471b9cd1cbb7c6d54a74251a711b), + copy: true, + }, + Rewrite { + source_location: "r/c3di", + source_index: 12, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + location: "c7", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(cf7a729ca69bfabd0995fc9b083e86a18215bd91), + copy: true, + }, + Modification { + location: "b", + previous_index: 0, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(54781fa52cf133fa9d0bf59cfe2ef2621b5ad29f), + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(f198d0640214092732566fb00543163845c8252c), + }, + Addition { + location: "newly-added", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(97b3d1a5707f8a11fa5fa8bc6c3bd7b3965601fd), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 0, "similarity checks can't run"); + assert_eq!( + out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0, + "no limit configured" + ); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 21); + + Ok(()) +} + +#[test] +fn realistic_renames_by_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r1-base", + "r1-change", + Some(Rewrites { + copies: Some(Copies::default()), + limit: 1, + track_empty: true, + ..Default::default() + }), + )?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "git-index/src/file.rs", + source_index: 18, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "git-index/src/file/mod.rs", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + Addition { + location: "git-index/tests/index/file/access.rs", + index: 45, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Modification { + location: "git-index/tests/index/file/mod.rs", + previous_index: 45, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + index: 46, + entry_mode: Mode( + FILE, + ), + id: Sha1(8ba3a16384aacc37d01564b28401755ce8053f51), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!(out.num_similarity_checks, 1); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_disabled() -> crate::Result { + let changes = collect_changes_no_renames("r1-base", "r1-change")?; + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "git-index/src/file.rs", + index: 18, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "git-index/src/file/mod.rs", + index: 19, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "git-index/tests/index/file/access.rs", + index: 45, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Modification { + location: "git-index/tests/index/file/mod.rs", + previous_index: 45, + previous_entry_mode: Mode( + FILE, + ), + previous_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + index: 46, + entry_mode: Mode( + FILE, + ), + id: Sha1(8ba3a16384aacc37d01564b28401755ce8053f51), + }, + ] + "#); + Ok(()) +} + +#[test] +fn realistic_renames_disabled_3() -> crate::Result { + let changes = collect_changes_no_renames("r3-base", "r3-change")?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Addition { + location: "src/ein.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "src/gix.rs", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "src/plumbing-cli.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "src/porcelain-cli.rs", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + Ok(()) +} + +#[test] +fn realistic_renames_by_identity_3() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r3-base", + "r3-change", + Some(Rewrites { + copies: Some(Copies::default()), + limit: 1, + track_empty: true, + ..Default::default() + }), + )?; + + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "src/plumbing-cli.rs", + source_index: 0, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "src/ein.rs", + index: 0, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + Rewrite { + source_location: "src/porcelain-cli.rs", + source_index: 4, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "src/gix.rs", + index: 1, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_2() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r2-base", + "r2-change", + Some(Rewrites { + copies: Some(Copies::default()), + track_empty: false, + ..Default::default() + }), + )?; + + // We cannot capture renames if track-empty is disabled, as these are actually empty, + // and we can't take directory-shortcuts here (i.e. tracking knows no directories here + // as is the case with trees where we traverse breadth-first. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "git-sec/CHANGELOG.md", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/Cargo.toml", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/identity.rs", + index: 5, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/lib.rs", + index: 6, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/permission.rs", + index: 7, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/src/trust.rs", + index: 8, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/tests/identity/mod.rs", + index: 9, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Deletion { + location: "git-sec/tests/sec.rs", + index: 10, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/CHANGELOG.md", + index: 231, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/Cargo.toml", + index: 232, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/identity.rs", + index: 233, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/lib.rs", + index: 234, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/permission.rs", + index: 235, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/src/trust.rs", + index: 236, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/tests/identity/mod.rs", + index: 237, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Addition { + location: "gix-sec/tests/sec.rs", + index: 238, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + Ok(()) +} + +#[test] +fn realistic_renames_3_without_identity() -> crate::Result { + let (changes, out) = collect_changes_opts( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + )?; + + // We don't actually track directory renames, only files show up. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Rewrite { + source_location: "src/plumbing/options.rs", + source_index: 4, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + location: "src/plumbing-renamed/options/mod.rs", + index: 4, + entry_mode: Mode( + FILE, + ), + id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/mod.rs", + source_index: 3, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + location: "src/plumbing-renamed/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + copy: false, + }, + Rewrite { + source_location: "src/plumbing/main.rs", + source_index: 2, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + location: "src/plumbing-renamed/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + copy: false, + }, + ] + "#); + + let out = out.expect("tracking enabled"); + assert_eq!( + out.num_similarity_checks, 0, + "similarity checks disabled, and not necessary" + ); + assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0); + assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0); + + let (changes, _out) = collect_changes_opts_with_pathspec( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + Some("src/plumbing/m*"), + )?; + + // Pathspecs are applied in advance, which affects rename tracking. + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Deletion { + location: "src/plumbing/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Deletion { + location: "src/plumbing/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + }, + ] + "#); + + let (changes, _out) = collect_changes_opts_with_pathspec( + "r4-base", + "r4-dir-rename-non-identity", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + Some("src/plumbing-renamed/m*"), + )?; + // One can also get the other side of the rename + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Addition { + location: "src/plumbing-renamed/main.rs", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Addition { + location: "src/plumbing-renamed/mod.rs", + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f), + }, + ] + "#); + + Ok(()) +} + +#[test] +fn unmerged_entries_and_intent_to_add() -> crate::Result { + let (changes, _out) = collect_changes_opts( + "r4-dir-rename-non-identity", + ".git/index", + Some(Rewrites { + copies: None, + percentage: None, + limit: 0, + track_empty: false, + }), + )?; + + // each unmerged entry is emitted separately, and no entry is emitted for + // paths that are mentioned there. Intent-to-add is transparent. + // All that with rename tracking… + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Base, + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Ours, + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + ] + "#); + + let changes = collect_changes_no_renames("r4-dir-rename-non-identity", ".git/index")?; + // …or without + insta::assert_debug_snapshot!(changes.into_iter().collect::>(), @r#" + [ + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Base, + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + Unmerged { + location: "src/plumbing-renamed/main.rs", + stage: Ours, + index: 3, + entry_mode: Mode( + FILE, + ), + id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d), + }, + ] + "#); + + let (index, _, _, _, _) = repo_with_indices(".git/index", ".git/index", None)?; + assert_eq!( + index.entry_by_path("will-add".into()).map(|e| e.id), + Some(hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")), + "the file is there, but we don't see it" + ); + + Ok(()) +} + +mod util { + use gix_diff::rewrites; + use std::convert::Infallible; + use std::path::{Path, PathBuf}; + + fn repo_workdir() -> crate::Result { + gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_rewrites_repo.sh") + } + + pub fn repo_with_indices( + lhs: impl Into>, + rhs: impl Into>, + patterns: impl IntoIterator, + ) -> gix_testtools::Result<( + gix_index::State, + gix_index::State, + gix_diff::blob::Platform, + gix_odb::Handle, + gix_pathspec::Search, + )> { + let root = repo_workdir()?; + let odb = gix_odb::at(root.join(".git/objects"))?; + let lhs = read_index(&odb, &root, lhs.into())?; + let rhs = read_index(&odb, &root, rhs.into())?; + + let cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new(Default::default(), Default::default(), Vec::new(), Default::default()), + Default::default(), + gix_worktree::Stack::new( + &root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::default()), + Default::default(), + Vec::new(), + Vec::new(), + ), + ); + let pathspecs = gix_pathspec::Search::from_specs( + patterns + .into_iter() + .map(|p| gix_pathspec::Pattern::from_bytes(p.as_bytes(), Default::default()).expect("valid pattern")), + None, + &root, + )?; + Ok((lhs, rhs, cache, odb, pathspecs)) + } + + pub fn collect_changes_no_renames( + lhs: impl Into>, + rhs: impl Into>, + ) -> gix_testtools::Result> { + Ok(collect_changes_opts(lhs, rhs, None)?.0) + } + + pub fn collect_changes_opts( + lhs: impl Into>, + rhs: impl Into>, + options: Option, + ) -> gix_testtools::Result<(Vec, Option)> { + collect_changes_opts_with_pathspec(lhs, rhs, options, None) + } + + pub fn collect_changes_opts_with_pathspec( + lhs: impl Into>, + rhs: impl Into>, + options: Option, + patterns: impl IntoIterator, + ) -> gix_testtools::Result<(Vec, Option)> { + let (from, to, mut cache, odb, mut pathspecs) = repo_with_indices(lhs, rhs, patterns)?; + let mut out = Vec::new(); + let rewrites_info = gix_diff::index( + &from, + &to, + |change| -> Result<_, Infallible> { + out.push(change.into_owned()); + Ok(gix_diff::index::Action::Continue) + }, + options.map(|rewrites| gix_diff::index::RewriteOptions { + rewrites, + resource_cache: &mut cache, + find: &odb, + }), + &mut pathspecs, + &mut |_, _, _, _| false, + )?; + Ok((out, rewrites_info)) + } + + fn read_index( + odb: impl gix_object::Find, + root: &Path, + tree: Option<&str>, + ) -> gix_testtools::Result { + let Some(tree) = tree else { + return Ok(gix_index::State::new(gix_hash::Kind::Sha1)); + }; + if tree == ".git/index" { + Ok(gix_index::File::at(root.join(tree), gix_hash::Kind::Sha1, false, Default::default())?.into()) + } else { + let tree_id_path = root.join(tree).with_extension("tree"); + let hex_id = std::fs::read_to_string(&tree_id_path).map_err(|err| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Could not read '{}': {}", tree_id_path.display(), err), + ) + })?; + let tree_id = gix_hash::ObjectId::from_hex(hex_id.trim().as_bytes())?; + Ok(gix_index::State::from_tree(&tree_id, odb, Default::default())?) + } + } +} +use crate::hex_to_id; +use util::{collect_changes_no_renames, collect_changes_opts, collect_changes_opts_with_pathspec, repo_with_indices}; diff --git a/gix-diff/tests/diff/main.rs b/gix-diff/tests/diff/main.rs index 2163b5d3b01..a6be530ffdb 100644 --- a/gix-diff/tests/diff/main.rs +++ b/gix-diff/tests/diff/main.rs @@ -5,6 +5,7 @@ fn hex_to_id(hex: &str) -> gix_hash::ObjectId { } mod blob; +mod index; mod rewrites; mod tree; mod tree_with_rewrites; diff --git a/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar b/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar index 878a4113ef7..b36453ef90e 100644 Binary files a/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar and b/gix-diff/tests/fixtures/generated-archives/make_diff_for_rewrites_repo.tar differ diff --git a/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh b/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh index a03188ed553..10b817cb3e2 100755 --- a/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh +++ b/gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh @@ -806,4 +806,18 @@ git mv src/plumbing src/plumbing-renamed git commit -m "r4-dir-rename-non-identity" store_tree "r4-dir-rename-non-identity" +git checkout -b conflicting @~1 +git rm src/plumbing/main.rs +git commit -m "remove main.rs" + +git checkout main +git merge conflicting || : + +echo not-empty >will-add +git add --intent-to-add will-add + +# a file with skip-worktree flag, which has no bearing on tree/index diffs. +git update-index --skip-worktree src/shared.rs +rm src/shared.rs + mv ../*.tree . \ No newline at end of file diff --git a/gix-hash/src/oid.rs b/gix-hash/src/oid.rs index 66fdf3f5568..4e67dadf9f1 100644 --- a/gix-hash/src/oid.rs +++ b/gix-hash/src/oid.rs @@ -44,9 +44,9 @@ pub struct HexDisplay<'a> { impl std::fmt::Display for HexDisplay<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut hex = Kind::hex_buf(); - let max_len = self.inner.hex_to_buf(hex.as_mut()); - let hex = std::str::from_utf8(&hex[..self.hex_len.min(max_len)]).expect("ascii only in hex"); - f.write_str(hex) + let hex = self.inner.hex_to_buf(hex.as_mut()); + let max_len = hex.len(); + f.write_str(&hex[..self.hex_len.min(max_len)]) } } @@ -152,22 +152,21 @@ impl oid { /// Sha1 specific methods impl oid { - /// Write ourselves to the `out` in hexadecimal notation, returning the amount of written bytes. + /// Write ourselves to the `out` in hexadecimal notation, returning the hex-string ready for display. /// /// **Panics** if the buffer isn't big enough to hold twice as many bytes as the current binary size. #[inline] #[must_use] - pub fn hex_to_buf(&self, buf: &mut [u8]) -> usize { + pub fn hex_to_buf<'a>(&self, buf: &'a mut [u8]) -> &'a mut str { let num_hex_bytes = self.bytes.len() * 2; - faster_hex::hex_encode(&self.bytes, &mut buf[..num_hex_bytes]).expect("to count correctly"); - num_hex_bytes + faster_hex::hex_encode(&self.bytes, &mut buf[..num_hex_bytes]).expect("to count correctly") } /// Write ourselves to `out` in hexadecimal notation. #[inline] pub fn write_hex_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> { let mut hex = Kind::hex_buf(); - let hex_len = self.hex_to_buf(&mut hex); + let hex_len = self.hex_to_buf(&mut hex).len(); out.write_all(&hex[..hex_len]) } @@ -210,10 +209,8 @@ impl<'a> From<&'a [u8; SIZE_OF_SHA1_DIGEST]> for &'a oid { impl std::fmt::Display for &oid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for b in self.as_bytes() { - write!(f, "{b:02x}")?; - } - Ok(()) + let mut buf = Kind::hex_buf(); + f.write_str(self.hex_to_buf(&mut buf)) } } diff --git a/gix-index/src/init.rs b/gix-index/src/init.rs index a9dfd150a6e..968651806ce 100644 --- a/gix-index/src/init.rs +++ b/gix-index/src/init.rs @@ -3,8 +3,8 @@ pub mod from_tree { use std::collections::VecDeque; use bstr::{BStr, BString, ByteSlice, ByteVec}; - use gix_object::{tree, tree::EntryKind, FindExt}; - use gix_traverse::tree::{breadthfirst, visit::Action, Visit}; + use gix_object::{tree, tree::EntryKind}; + use gix_traverse::tree::{depthfirst, visit::Action, Visit}; use crate::{ entry::{Flags, Mode, Stat}, @@ -21,7 +21,7 @@ pub mod from_tree { source: gix_validate::path::component::Error, }, #[error(transparent)] - Traversal(#[from] gix_traverse::tree::breadthfirst::Error), + Traversal(#[from] gix_traverse::tree::depthfirst::Error), } /// Initialization @@ -58,12 +58,8 @@ pub mod from_tree { Find: gix_object::Find, { let _span = gix_features::trace::coarse!("gix_index::State::from_tree()"); - let mut buf = Vec::new(); - let root = objects - .find_tree_iter(tree, &mut buf) - .map_err(breadthfirst::Error::from)?; let mut delegate = CollectEntries::new(validate); - match breadthfirst(root, breadthfirst::State::default(), &objects, &mut delegate) { + match depthfirst(tree.to_owned(), depthfirst::State::default(), &objects, &mut delegate) { Ok(()) => {} Err(gix_traverse::tree::breadthfirst::Error::Cancelled) => { let (path, err) = delegate @@ -76,15 +72,17 @@ pub mod from_tree { } let CollectEntries { - mut entries, + entries, path_backing, path: _, path_deque: _, validate: _, - invalid_path: _, + invalid_path, } = delegate; - entries.sort_by(|a, b| Entry::cmp_filepaths(a.path_in(&path_backing), b.path_in(&path_backing))); + if let Some((path, err)) = invalid_path { + return Err(Error::InvalidComponent { path, source: err }); + } Ok(State { object_hash: tree.kind(), @@ -126,6 +124,9 @@ pub mod from_tree { } fn push_element(&mut self, name: &BStr) { + if name.is_empty() { + return; + } if !self.path.is_empty() { self.path.push(b'/'); } @@ -182,6 +183,10 @@ pub mod from_tree { } impl Visit for CollectEntries { + fn pop_back_tracked_path_and_set_current(&mut self) { + self.path = self.path_deque.pop_back().unwrap_or_default(); + } + fn pop_front_tracked_path_and_set_current(&mut self) { self.path = self .path_deque diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index f13d9500383..ec86bc75579 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -1,8 +1,7 @@ +use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; use bstr::BStr; use winnow::{error::ParserError, prelude::*}; -use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; - impl<'a> TreeRefIter<'a> { /// Instantiate an iterator from the given tree data. pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { @@ -126,6 +125,21 @@ impl<'a> TreeRefIter<'a> { pub fn entries(self) -> Result>, crate::decode::Error> { self.collect() } + + /// Return the offset in bytes that our data advanced from `buf`, the original buffer + /// to the beginning of the data of the tree. + /// + /// Then the tree-iteration can be resumed at the entry that would otherwise be returned next. + pub fn offset_to_next_entry(&self, buf: &[u8]) -> usize { + let before = (*buf).as_ptr(); + let after = (*self.data).as_ptr(); + + debug_assert!( + before <= after, + "`TreeRefIter::offset_to_next_entry(): {after:?} <= {before:?}) violated" + ); + (after as usize - before as usize) / std::mem::size_of::() + } } impl<'a> Iterator for TreeRefIter<'a> { diff --git a/gix-object/tests/object/tree/iter.rs b/gix-object/tests/object/tree/iter.rs index 1f597499238..b4717493a29 100644 --- a/gix-object/tests/object/tree/iter.rs +++ b/gix-object/tests/object/tree/iter.rs @@ -23,6 +23,24 @@ fn error_handling() { ); } +#[test] +fn offset_to_next_entry() { + let buf = fixture_name("tree", "everything.tree"); + let mut iter = TreeRefIter::from_bytes(&buf); + assert_eq!(iter.offset_to_next_entry(&buf), 0, "first entry is always at 0"); + iter.next(); + + let actual = iter.offset_to_next_entry(&buf); + assert_eq!(actual, 31, "now the offset increases"); + assert_eq!( + TreeRefIter::from_bytes(&buf[actual..]) + .next() + .map(|e| e.unwrap().filename), + iter.next().map(|e| e.unwrap().filename), + "One can now start the iteration at a certain entry" + ); +} + #[test] fn everything() -> crate::Result { assert_eq!( diff --git a/gix-odb/src/store_impls/loose/mod.rs b/gix-odb/src/store_impls/loose/mod.rs index 17e4a33d65a..9becb5cd4a4 100644 --- a/gix-odb/src/store_impls/loose/mod.rs +++ b/gix-odb/src/store_impls/loose/mod.rs @@ -42,10 +42,9 @@ impl Store { fn hash_path(id: &gix_hash::oid, mut root: PathBuf) -> PathBuf { let mut hex = gix_hash::Kind::hex_buf(); - let hex_len = id.hex_to_buf(hex.as_mut()); - let buf = std::str::from_utf8(&hex[..hex_len]).expect("ascii only in hex"); - root.push(&buf[..2]); - root.push(&buf[2..]); + let hex = id.hex_to_buf(hex.as_mut()); + root.push(&hex[..2]); + root.push(&hex[2..]); root } diff --git a/gix-pack/src/data/output/count/objects/tree.rs b/gix-pack/src/data/output/count/objects/tree.rs index 3f464217b8b..1187fa15baf 100644 --- a/gix-pack/src/data/output/count/objects/tree.rs +++ b/gix-pack/src/data/output/count/objects/tree.rs @@ -94,6 +94,8 @@ pub mod traverse { where H: InsertImmutable, { + fn pop_back_tracked_path_and_set_current(&mut self) {} + fn pop_front_tracked_path_and_set_current(&mut self) {} fn push_back_tracked_path_component(&mut self, _component: &BStr) {} diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 672beafaf99..6e0ed72befe 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -2,10 +2,23 @@ //! of the repository state, like comparisons between… //! //! * index and working tree -//! * index and tree -//! * find untracked files +//! * *tree and index* //! -//! While also being able to check check if the working tree is dirty, quickly. +//! …while also being able to check if the working tree is dirty, quickly, by instructing the operation to stop once the first +//! change was found. +//! +//! ### Tree-Index Status +//! +//! This status is not actually implemented here as it's not implemented directly. Instead, one creates an Index from a tree +//! and then diffs two indices with `gix_diff::index(index_from_tree, usually_dot_git_index)`. This adds about 15% to the runtime +//! and comes at the cost of another index in memory. +//! Once there are generators implementing depth-first tree iteration should become trivial, but for now it's very hard if one +//! wants to return referenced state of the iterator (which is not possible). +//! +//! ### Difference to `gix-diff` +//! +//! Technically, `status` is just another form of diff between different kind of sides, i.e. an index and a working tree. +//! This is the difference to `gix-diff`, which compares only similar items. //! //! ### Feature Flags #![cfg_attr( diff --git a/gix-traverse/src/tree/breadthfirst.rs b/gix-traverse/src/tree/breadthfirst.rs index 441d4c50d38..5a0342c337d 100644 --- a/gix-traverse/src/tree/breadthfirst.rs +++ b/gix-traverse/src/tree/breadthfirst.rs @@ -2,7 +2,8 @@ use std::collections::VecDeque; use gix_hash::ObjectId; -/// The error is part of the item returned by the [`traverse()`][impl_::traverse()] function. +/// The error is part of the item returned by the [`breadthfirst()`](crate::tree::breadthfirst()) and +///[`depthfirst()`](crate::tree::depthfirst()) functions. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -28,7 +29,7 @@ impl State { } } -pub(crate) mod impl_ { +pub(super) mod function { use std::borrow::BorrowMut; use gix_object::{FindExt, TreeRefIter}; @@ -38,6 +39,8 @@ pub(crate) mod impl_ { /// Start a breadth-first iteration over the `root` trees entries. /// + /// Note that non-trees will be listed first, so the natural order of entries within a tree is lost. + /// /// * `root` /// * the tree to iterate in a nested fashion. /// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing @@ -46,9 +49,9 @@ pub(crate) mod impl_ { /// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function /// as needed. The return value is `Option` which degenerates all error information. Not finding a commit should also /// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::Find`] should - /// be escalated into a more specific error if its encountered by the caller. + /// be escalated into a more specific error if it's encountered by the caller. /// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use. - pub fn traverse( + pub fn breadthfirst( root: TreeRefIter<'_>, mut state: StateMut, objects: Find, diff --git a/gix-traverse/src/tree/depthfirst.rs b/gix-traverse/src/tree/depthfirst.rs new file mode 100644 index 00000000000..5b0c9eab79c --- /dev/null +++ b/gix-traverse/src/tree/depthfirst.rs @@ -0,0 +1,112 @@ +pub use super::breadthfirst::Error; + +/// The state used and potentially shared by multiple tree traversals, reusing memory. +#[derive(Default, Clone)] +pub struct State { + freelist: Vec>, +} + +impl State { + /// Pop one empty buffer from the free-list. + pub fn pop_buf(&mut self) -> Vec { + match self.freelist.pop() { + None => Vec::new(), + Some(mut buf) => { + buf.clear(); + buf + } + } + } + + /// Make `buf` available for re-use with [`Self::pop_buf()`]. + pub fn push_buf(&mut self, buf: Vec) { + self.freelist.push(buf); + } +} + +pub(super) mod function { + use super::{Error, State}; + use crate::tree::visit::Action; + use crate::tree::Visit; + use gix_hash::ObjectId; + use gix_object::{FindExt, TreeRefIter}; + use std::borrow::BorrowMut; + + /// A depth-first traversal of the `root` tree, that preserves the natural order of a tree while immediately descending + /// into sub-trees. + /// + /// `state` can be passed to re-use memory during multiple invocations. + pub fn depthfirst( + root: ObjectId, + mut state: StateMut, + objects: Find, + delegate: &mut V, + ) -> Result<(), Error> + where + Find: gix_object::Find, + StateMut: BorrowMut, + V: Visit, + { + enum Machine { + GetTree(ObjectId), + Iterate { + tree_buf: Vec, + byte_offset_to_next_entry: usize, + }, + } + + let state = state.borrow_mut(); + let mut stack = vec![Machine::GetTree(root)]; + 'outer: while let Some(item) = stack.pop() { + match item { + Machine::GetTree(id) => { + let mut buf = state.pop_buf(); + objects.find_tree_iter(&id, &mut buf)?; + stack.push(Machine::Iterate { + tree_buf: buf, + byte_offset_to_next_entry: 0, + }); + } + Machine::Iterate { + tree_buf: buf, + byte_offset_to_next_entry, + } => { + let mut iter = TreeRefIter::from_bytes(&buf[byte_offset_to_next_entry..]); + delegate.pop_back_tracked_path_and_set_current(); + while let Some(entry) = iter.next() { + let entry = entry?; + if entry.mode.is_tree() { + delegate.push_path_component(entry.filename); + let res = delegate.visit_tree(&entry); + delegate.pop_path_component(); + match res { + Action::Continue => {} + Action::Cancel => break 'outer, + Action::Skip => continue, + } + + delegate.push_back_tracked_path_component("".into()); + delegate.push_back_tracked_path_component(entry.filename); + let recurse_tree = Machine::GetTree(entry.oid.to_owned()); + let continue_at_next_entry = Machine::Iterate { + byte_offset_to_next_entry: iter.offset_to_next_entry(&buf), + tree_buf: buf, + }; + stack.push(continue_at_next_entry); + stack.push(recurse_tree); + continue 'outer; + } else { + delegate.push_path_component(entry.filename); + if let Action::Cancel = delegate.visit_nontree(&entry) { + break 'outer; + } + delegate.pop_path_component(); + } + } + state.push_buf(buf); + } + } + } + Ok(()) + } +} diff --git a/gix-traverse/src/tree/mod.rs b/gix-traverse/src/tree/mod.rs index 6a74ada28f3..85258f4301f 100644 --- a/gix-traverse/src/tree/mod.rs +++ b/gix-traverse/src/tree/mod.rs @@ -5,9 +5,19 @@ use gix_object::bstr::{BStr, BString}; /// A trait to allow responding to a traversal designed to observe all entries in a tree, recursively while keeping track of /// paths if desired. pub trait Visit { - /// Sets the full path path in front of the queue so future calls to push and pop components affect it instead. + /// Sets the full path in the back of the queue so future calls to push and pop components affect it instead. + /// + /// Note that the first call is made without an accompanying call to [`Self::push_back_tracked_path_component()`] + /// + /// This is used by the depth-first traversal of trees. + fn pop_back_tracked_path_and_set_current(&mut self); + /// Sets the full path in front of the queue so future calls to push and pop components affect it instead. + /// + /// This is used by the breadth-first traversal of trees. fn pop_front_tracked_path_and_set_current(&mut self); /// Append a `component` to the end of a path, which may be empty. + /// + /// If `component` is empty, store the current path. fn push_back_tracked_path_component(&mut self, component: &BStr); /// Append a `component` to the end of a path, which may be empty. fn push_path_component(&mut self, component: &BStr); @@ -66,4 +76,8 @@ pub mod recorder; /// pub mod breadthfirst; -pub use breadthfirst::impl_::traverse as breadthfirst; +pub use breadthfirst::function::breadthfirst; + +/// +pub mod depthfirst; +pub use depthfirst::function::depthfirst; diff --git a/gix-traverse/src/tree/recorder.rs b/gix-traverse/src/tree/recorder.rs index 6447ffb7133..029751295b8 100644 --- a/gix-traverse/src/tree/recorder.rs +++ b/gix-traverse/src/tree/recorder.rs @@ -62,6 +62,9 @@ impl Recorder { } fn push_element(&mut self, name: &BStr) { + if name.is_empty() { + return; + } if !self.path.is_empty() { self.path.push(b'/'); } @@ -92,6 +95,12 @@ impl Recorder { } impl Visit for Recorder { + fn pop_back_tracked_path_and_set_current(&mut self) { + if let Some(Location::Path) = self.location { + self.path = self.path_deque.pop_back().unwrap_or_default(); + } + } + fn pop_front_tracked_path_and_set_current(&mut self) { if let Some(Location::Path) = self.location { self.path = self diff --git a/gix-traverse/tests/Cargo.toml b/gix-traverse/tests/Cargo.toml index 24c49ed843f..83c3860b992 100644 --- a/gix-traverse/tests/Cargo.toml +++ b/gix-traverse/tests/Cargo.toml @@ -11,10 +11,11 @@ edition = "2021" rust-version = "1.65" [[test]] -name = "test" -path = "traverse.rs" +name = "traverse" +path = "traverse/main.rs" [dev-dependencies] +insta = "1.40.0" gix-traverse = { path = ".." } gix-testtools = { path = "../../tests/tools" } gix-odb = { path = "../../gix-odb" } diff --git a/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_trees_depthfirst.tar b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_trees_depthfirst.tar new file mode 100644 index 00000000000..4486330c739 Binary files /dev/null and b/gix-traverse/tests/fixtures/generated-archives/make_traversal_repo_for_trees_depthfirst.tar differ diff --git a/gix-traverse/tests/fixtures/make_traversal_repo_for_trees_depthfirst.sh b/gix-traverse/tests/fixtures/make_traversal_repo_for_trees_depthfirst.sh new file mode 100755 index 00000000000..ae1a493eea3 --- /dev/null +++ b/gix-traverse/tests/fixtures/make_traversal_repo_for_trees_depthfirst.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init -q + +git checkout -q -b main +touch a b c +mkdir d e f +touch d/a e/b f/c f/z +mkdir f/ISSUE_TEMPLATE +touch f/ISSUE_TEMPLATE/x f/FUNDING.yml f/dependabot.yml + +git add . +git commit -q -m c1 diff --git a/gix-traverse/tests/commit/mod.rs b/gix-traverse/tests/traverse/commit/mod.rs similarity index 100% rename from gix-traverse/tests/commit/mod.rs rename to gix-traverse/tests/traverse/commit/mod.rs diff --git a/gix-traverse/tests/commit/simple.rs b/gix-traverse/tests/traverse/commit/simple.rs similarity index 100% rename from gix-traverse/tests/commit/simple.rs rename to gix-traverse/tests/traverse/commit/simple.rs diff --git a/gix-traverse/tests/commit/topo.rs b/gix-traverse/tests/traverse/commit/topo.rs similarity index 100% rename from gix-traverse/tests/commit/topo.rs rename to gix-traverse/tests/traverse/commit/topo.rs diff --git a/gix-traverse/tests/traverse.rs b/gix-traverse/tests/traverse/main.rs similarity index 100% rename from gix-traverse/tests/traverse.rs rename to gix-traverse/tests/traverse/main.rs diff --git a/gix-traverse/tests/traverse/tree.rs b/gix-traverse/tests/traverse/tree.rs new file mode 100644 index 00000000000..048ade4a205 --- /dev/null +++ b/gix-traverse/tests/traverse/tree.rs @@ -0,0 +1,378 @@ +fn db() -> crate::Result { + named_db("make_traversal_repo_for_trees.sh") +} + +fn named_db(name: &str) -> crate::Result { + let dir = gix_testtools::scripted_fixture_read_only_standalone(name)?; + let db = gix_odb::at(dir.join(".git").join("objects"))?; + Ok(db) +} + +mod depthfirst { + use crate::hex_to_id; + use crate::tree::{db, named_db}; + use gix_object::FindExt; + use gix_traverse::tree; + use gix_traverse::tree::recorder::Location; + + #[test] + fn full_path_and_filename() -> crate::Result { + let db = db()?; + let mut state = gix_traverse::tree::depthfirst::State::default(); + let mut buf = state.pop_buf(); + let mut recorder = tree::Recorder::default(); + let tree = db + .find_commit(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? + .tree(); + + gix_traverse::tree::depthfirst(tree, &mut state, &db, &mut recorder)?; + insta::assert_debug_snapshot!(recorder.records, @r#" + [ + Entry { + mode: EntryMode(0o100644), + filepath: "a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "d", + oid: Sha1(496d6428b9cf92981dc9495211e6e1120fb6f2ba), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "d/a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "e", + oid: Sha1(4277b6e69d25e5efa77c455340557b384a4c018a), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "e/b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "f", + oid: Sha1(70fb16fc77b03e16acb4a5b1a6caf79ba302919a), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "f/d", + oid: Sha1(5805b676e247eb9a8046ad0c4d249cd2fb2513df), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/d/x", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/z", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + + recorder.records.clear(); + recorder = recorder.track_location(Some(Location::FileName)); + gix_traverse::tree::depthfirst(tree, state, &db, &mut recorder)?; + insta::assert_debug_snapshot!(recorder.records, @r#" + [ + Entry { + mode: EntryMode(0o100644), + filepath: "a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "d", + oid: Sha1(496d6428b9cf92981dc9495211e6e1120fb6f2ba), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "e", + oid: Sha1(4277b6e69d25e5efa77c455340557b384a4c018a), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "f", + oid: Sha1(70fb16fc77b03e16acb4a5b1a6caf79ba302919a), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o40000), + filepath: "d", + oid: Sha1(5805b676e247eb9a8046ad0c4d249cd2fb2513df), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "x", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "z", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + Ok(()) + } + + #[test] + fn more_difficult_fixture() -> crate::Result { + let db = named_db("make_traversal_repo_for_trees_depthfirst.sh")?; + let mut state = gix_traverse::tree::depthfirst::State::default(); + let mut buf = state.pop_buf(); + let mut recorder = tree::Recorder::default(); + let tree = db + .find_commit(&hex_to_id("fe63a8a9fb7c27c089835aae92cbda675523803a"), &mut buf)? + .tree(); + + gix_traverse::tree::depthfirst(tree, &mut state, &db, &mut recorder)?; + insta::assert_debug_snapshot!(recorder.records.into_iter().filter(|e| e.mode.is_no_tree()).collect::>(), @r#" + [ + Entry { + mode: EntryMode(0o100644), + filepath: "a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "d/a", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "e/b", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/FUNDING.yml", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/ISSUE_TEMPLATE/x", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/c", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/dependabot.yml", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + Entry { + mode: EntryMode(0o100644), + filepath: "f/z", + oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + }, + ] + "#); + Ok(()) + } +} + +mod breadthfirst { + use crate::hex_to_id; + use crate::tree::db; + use gix_object::bstr::BString; + use gix_odb::pack::FindExt; + use gix_traverse::tree; + use gix_traverse::tree::recorder::Location; + + #[test] + fn full_path() -> crate::Result { + let db = db()?; + let mut buf = Vec::new(); + let mut buf2 = Vec::new(); + let mut commit = db + .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? + .0; + // Full paths - that's the default. + let mut recorder = tree::Recorder::default(); + gix_traverse::tree::breadthfirst( + db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? + .0, + tree::breadthfirst::State::default(), + &db, + &mut recorder, + )?; + + use gix_object::tree::EntryKind::*; + use gix_traverse::tree::recorder::Entry; + assert_eq!( + recorder.records, + vec![ + Entry { + mode: Blob.into(), + filepath: "a".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Blob.into(), + filepath: "b".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Blob.into(), + filepath: "c".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Tree.into(), + filepath: "d".into(), + oid: hex_to_id("496d6428b9cf92981dc9495211e6e1120fb6f2ba") + }, + Entry { + mode: Tree.into(), + filepath: "e".into(), + oid: hex_to_id("4277b6e69d25e5efa77c455340557b384a4c018a") + }, + Entry { + mode: Tree.into(), + filepath: "f".into(), + oid: hex_to_id("70fb16fc77b03e16acb4a5b1a6caf79ba302919a") + }, + Entry { + mode: Blob.into(), + filepath: "d/a".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Blob.into(), + filepath: "e/b".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Blob.into(), + filepath: "f/c".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Tree.into(), + filepath: "f/d".into(), + oid: hex_to_id("5805b676e247eb9a8046ad0c4d249cd2fb2513df") + }, + Entry { + mode: Blob.into(), + filepath: "f/z".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + }, + Entry { + mode: Blob.into(), + filepath: "f/d/x".into(), + oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + } + ] + ); + Ok(()) + } + + #[test] + fn filename_only() -> crate::Result<()> { + let db = db()?; + let mut buf = Vec::new(); + let mut buf2 = Vec::new(); + let mut commit = db + .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? + .0; + let mut recorder = tree::Recorder::default().track_location(Some(Location::FileName)); + gix_traverse::tree::breadthfirst( + db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? + .0, + tree::breadthfirst::State::default(), + &db, + &mut recorder, + )?; + + assert_eq!( + recorder.records.into_iter().map(|e| e.filepath).collect::>(), + ["a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "z", "x"] + .into_iter() + .map(BString::from) + .collect::>() + ); + Ok(()) + } + + #[test] + fn no_location() -> crate::Result<()> { + let db = db()?; + let mut buf = Vec::new(); + let mut buf2 = Vec::new(); + let mut commit = db + .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? + .0; + let mut recorder = tree::Recorder::default().track_location(None); + gix_traverse::tree::breadthfirst( + db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? + .0, + tree::breadthfirst::State::default(), + &db, + &mut recorder, + )?; + + for path in recorder.records.into_iter().map(|e| e.filepath) { + assert_eq!(path, "", "path should be empty as it's not tracked at all"); + } + Ok(()) + } +} diff --git a/gix-traverse/tests/tree/mod.rs b/gix-traverse/tests/tree/mod.rs deleted file mode 100644 index 0200abf1690..00000000000 --- a/gix-traverse/tests/tree/mod.rs +++ /dev/null @@ -1,149 +0,0 @@ -use gix_object::bstr::BString; -use gix_odb::pack::FindExt; -use gix_traverse::{tree, tree::recorder::Location}; - -use crate::hex_to_id; - -fn db() -> crate::Result { - let dir = gix_testtools::scripted_fixture_read_only_standalone("make_traversal_repo_for_trees.sh")?; - let db = gix_odb::at(dir.join(".git").join("objects"))?; - Ok(db) -} - -#[test] -fn breadth_first_full_path() -> crate::Result<()> { - let db = db()?; - let mut buf = Vec::new(); - let mut buf2 = Vec::new(); - let mut commit = db - .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? - .0; - // Full paths - that's the default. - let mut recorder = tree::Recorder::default(); - gix_traverse::tree::breadthfirst( - db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? - .0, - tree::breadthfirst::State::default(), - &db, - &mut recorder, - )?; - - use gix_object::tree::EntryKind::*; - use gix_traverse::tree::recorder::Entry; - assert_eq!( - recorder.records, - vec![ - Entry { - mode: Blob.into(), - filepath: "a".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Blob.into(), - filepath: "b".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Blob.into(), - filepath: "c".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Tree.into(), - filepath: "d".into(), - oid: hex_to_id("496d6428b9cf92981dc9495211e6e1120fb6f2ba") - }, - Entry { - mode: Tree.into(), - filepath: "e".into(), - oid: hex_to_id("4277b6e69d25e5efa77c455340557b384a4c018a") - }, - Entry { - mode: Tree.into(), - filepath: "f".into(), - oid: hex_to_id("70fb16fc77b03e16acb4a5b1a6caf79ba302919a") - }, - Entry { - mode: Blob.into(), - filepath: "d/a".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Blob.into(), - filepath: "e/b".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Blob.into(), - filepath: "f/c".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Tree.into(), - filepath: "f/d".into(), - oid: hex_to_id("5805b676e247eb9a8046ad0c4d249cd2fb2513df") - }, - Entry { - mode: Blob.into(), - filepath: "f/z".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - }, - Entry { - mode: Blob.into(), - filepath: "f/d/x".into(), - oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - } - ] - ); - Ok(()) -} - -#[test] -fn breadth_first_filename_only() -> crate::Result<()> { - let db = db()?; - let mut buf = Vec::new(); - let mut buf2 = Vec::new(); - let mut commit = db - .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? - .0; - let mut recorder = tree::Recorder::default().track_location(Some(Location::FileName)); - gix_traverse::tree::breadthfirst( - db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? - .0, - tree::breadthfirst::State::default(), - &db, - &mut recorder, - )?; - - assert_eq!( - recorder.records.into_iter().map(|e| e.filepath).collect::>(), - ["a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "z", "x"] - .into_iter() - .map(BString::from) - .collect::>() - ); - Ok(()) -} - -#[test] -fn breadth_first_no_location() -> crate::Result<()> { - let db = db()?; - let mut buf = Vec::new(); - let mut buf2 = Vec::new(); - let mut commit = db - .find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)? - .0; - let mut recorder = tree::Recorder::default().track_location(None); - gix_traverse::tree::breadthfirst( - db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)? - .0, - tree::breadthfirst::State::default(), - &db, - &mut recorder, - )?; - - for path in recorder.records.into_iter().map(|e| e.filepath) { - assert_eq!(path, "", "path should be empty as it's not tracked at all"); - } - Ok(()) -} diff --git a/gix-worktree-stream/src/from_tree/traverse.rs b/gix-worktree-stream/src/from_tree/traverse.rs index 8acb5bf6abd..3daa0d534ed 100644 --- a/gix-worktree-stream/src/from_tree/traverse.rs +++ b/gix-worktree-stream/src/from_tree/traverse.rs @@ -39,6 +39,9 @@ where } fn push_element(&mut self, name: &BStr) { + if name.is_empty() { + return; + } if !self.path.is_empty() { self.path.push(b'/'); } @@ -105,6 +108,10 @@ where AttributesFn: FnMut(&BStr, gix_object::tree::EntryMode, &mut gix_attributes::search::Outcome) -> Result<(), Error> + 'static, { + fn pop_back_tracked_path_and_set_current(&mut self) { + self.path = self.path_deque.pop_back().unwrap_or_default(); + } + fn pop_front_tracked_path_and_set_current(&mut self) { self.path = self .path_deque diff --git a/gix/Cargo.toml b/gix/Cargo.toml index c0578080ff9..61280a84054 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -86,7 +86,7 @@ comfort = [ command = ["dep:gix-command"] ## Obtain information similar to `git status`. -status = ["gix-status", "dirwalk", "index", "blob-diff"] +status = ["gix-status", "dirwalk", "index", "blob-diff", "gix-diff/index"] ## Utilities for interrupting computations and cleaning up tempfiles. interrupt = ["dep:signal-hook", "gix-tempfile/signals", "dep:parking_lot"] @@ -374,7 +374,7 @@ gix-command = { version = "^0.4.0", path = "../gix-command", optional = true } gix-worktree-stream = { version = "^0.18.0", path = "../gix-worktree-stream", optional = true } gix-archive = { version = "^0.18.0", path = "../gix-archive", default-features = false, optional = true } -gix-blame = { version= "^0.0.0", path ="../gix-blame", optional = true } +gix-blame = { version = "^0.0.0", path = "../gix-blame", optional = true } # For communication with remotes gix-protocol = { version = "^0.47.0", path = "../gix-protocol" } diff --git a/gix/examples/stats.rs b/gix/examples/stats.rs index 62bed720228..7c3680a5949 100644 --- a/gix/examples/stats.rs +++ b/gix/examples/stats.rs @@ -39,7 +39,6 @@ fn main() -> Result<(), Box> { let mut delegate = visit::Tree::new(repo.clone()); tree.traverse().breadthfirst(&mut delegate)?; - let _files = tree.traverse().breadthfirst.files()?; println!("num trees: {}", delegate.num_trees); println!("num blobs: {}", delegate.num_blobs); @@ -105,6 +104,8 @@ mod visit { } } impl gix_traverse::tree::Visit for Tree { + fn pop_back_tracked_path_and_set_current(&mut self) {} + fn pop_front_tracked_path_and_set_current(&mut self) {} fn push_back_tracked_path_component(&mut self, _component: &BStr) {} diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 46b842cdffb..4022b6430da 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -181,7 +181,7 @@ impl Cache { .user_agent .get_or_init(|| { self.resolved - .string(Gitoxide::USER_AGENT.logical_name().as_str()) + .string(&Gitoxide::USER_AGENT) .map_or_else(|| crate::env::agent().into(), |s| s.to_string()) }) .to_owned(); diff --git a/gix/src/config/snapshot/access.rs b/gix/src/config/snapshot/access.rs index 218417bc23a..6b753ab050e 100644 --- a/gix/src/config/snapshot/access.rs +++ b/gix/src/config/snapshot/access.rs @@ -22,13 +22,13 @@ impl<'repo> Snapshot<'repo> { /// For a non-degenerating version, use [`try_boolean(…)`][Self::try_boolean()]. /// /// Note that this method takes the most recent value at `key` even if it is from a file with reduced trust. - pub fn boolean<'a>(&self, key: impl Into<&'a BStr>) -> Option { + pub fn boolean(&self, key: impl gix_config::AsKey) -> Option { self.try_boolean(key).and_then(Result::ok) } /// Like [`boolean()`][Self::boolean()], but it will report an error if the value couldn't be interpreted as boolean. - pub fn try_boolean<'a>(&self, key: impl Into<&'a BStr>) -> Option> { - self.repo.config.resolved.boolean(key.into()) + pub fn try_boolean(&self, key: impl gix_config::AsKey) -> Option> { + self.repo.config.resolved.boolean(key) } /// Return the resolved integer at `key`, or `None` if there is no such value or if the value can't be interpreted as @@ -37,40 +37,40 @@ impl<'repo> Snapshot<'repo> { /// For a non-degenerating version, use [`try_integer(…)`][Self::try_integer()]. /// /// Note that this method takes the most recent value at `key` even if it is from a file with reduced trust. - pub fn integer<'a>(&self, key: impl Into<&'a BStr>) -> Option { + pub fn integer(&self, key: impl gix_config::AsKey) -> Option { self.try_integer(key).and_then(Result::ok) } /// Like [`integer()`][Self::integer()], but it will report an error if the value couldn't be interpreted as boolean. - pub fn try_integer<'a>(&self, key: impl Into<&'a BStr>) -> Option> { - self.repo.config.resolved.integer(key.into()) + pub fn try_integer(&self, key: impl gix_config::AsKey) -> Option> { + self.repo.config.resolved.integer(key) } /// Return the string at `key`, or `None` if there is no such value. /// /// Note that this method takes the most recent value at `key` even if it is from a file with reduced trust. - pub fn string<'a>(&self, key: impl Into<&'a BStr>) -> Option> { - self.repo.config.resolved.string(key.into()) + pub fn string(&self, key: impl gix_config::AsKey) -> Option> { + self.repo.config.resolved.string(key) } /// Return the trusted and fully interpolated path at `key`, or `None` if there is no such value /// or if no value was found in a trusted file. /// An error occurs if the path could not be interpolated to its final value. - pub fn trusted_path<'a>( + pub fn trusted_path( &self, - key: impl Into<&'a BStr>, + key: impl gix_config::AsKey, ) -> Option, gix_config::path::interpolate::Error>> { - self.repo.config.trusted_file_path(key.into()) + self.repo.config.trusted_file_path(key) } /// Return the trusted string at `key` for launching using [command::prepare()](gix_command::prepare()), /// or `None` if there is no such value or if no value was found in a trusted file. - pub fn trusted_program<'a>(&self, key: impl Into<&'a BStr>) -> Option> { + pub fn trusted_program(&self, key: impl gix_config::AsKey) -> Option> { let value = self .repo .config .resolved - .string_filter(key.into(), &mut self.repo.config.filter_config_section.clone())?; + .string_filter(key, &mut self.repo.config.filter_config_section.clone())?; Some(match gix_path::from_bstr(value) { Cow::Borrowed(v) => Cow::Borrowed(v.as_os_str()), Cow::Owned(v) => Cow::Owned(v.into_os_string()), diff --git a/gix/src/config/tree/sections/status.rs b/gix/src/config/tree/sections/status.rs index f60600e214b..28038325785 100644 --- a/gix/src/config/tree/sections/status.rs +++ b/gix/src/config/tree/sections/status.rs @@ -9,6 +9,16 @@ impl Status { &config::Tree::STATUS, validate::ShowUntrackedFiles, ); + /// The `status.renameLimit` key. + pub const RENAME_LIMIT: keys::UnsignedInteger = keys::UnsignedInteger::new_unsigned_integer( + "renameLimit", + &config::Tree::MERGE, + ) + .with_note( + "The limit is actually squared, so 1000 stands for up to 1 million diffs if fuzzy rename tracking is enabled", + ); + /// The `status.renames` key. + pub const RENAMES: super::diff::Renames = super::diff::Renames::new_renames("renames", &config::Tree::MERGE); } /// The `status.showUntrackedFiles` key. @@ -41,7 +51,7 @@ impl Section for Status { } fn keys(&self) -> &[&dyn Key] { - &[&Self::SHOW_UNTRACKED_FILES] + &[&Self::SHOW_UNTRACKED_FILES, &Self::RENAMES, &Self::RENAME_LIMIT] } } diff --git a/gix/src/dirwalk/iter.rs b/gix/src/dirwalk/iter.rs index 6bfde0ef8f7..0f31dd30b72 100644 --- a/gix/src/dirwalk/iter.rs +++ b/gix/src/dirwalk/iter.rs @@ -160,7 +160,12 @@ impl Iterator for Iter { #[cfg(feature = "parallel")] impl Drop for Iter { fn drop(&mut self) { - crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt); + crate::util::parallel_iter_drop( + self.rx_and_join + .take() + .map(|(rx, handle)| (rx, handle, None::>)), + &self.should_interrupt, + ); } } diff --git a/gix/src/object/tree/traverse.rs b/gix/src/object/tree/traverse.rs index 78159016abd..3c66ac56953 100644 --- a/gix/src/object/tree/traverse.rs +++ b/gix/src/object/tree/traverse.rs @@ -15,6 +15,8 @@ impl<'repo> Tree<'repo> { pub struct Platform<'a, 'repo> { root: &'a Tree<'repo>, /// Provides easy access to presets for common breadth-first traversal. + // TODO: remove this - it's a bit too much of a fixed function, or go all in once it's clear it's needed, + // but probably with depth-first. pub breadthfirst: BreadthFirstPresets<'a, 'repo>, } @@ -38,12 +40,12 @@ impl BreadthFirstPresets<'_, '_> { } impl Platform<'_, '_> { - /// Start a breadth-first, recursive traversal using `delegate`, for which a [`Recorder`][gix_traverse::tree::Recorder] can be used to get started. + /// Start a breadth-first, recursive traversal using `delegate`, for which a [`Recorder`](gix_traverse::tree::Recorder) can be used to get started. /// /// # Note /// - /// - Results are returned in sort order according to tree-entry sorting rules, one level at a time. - /// - for obtaining the direct children of the tree, use [.iter()][crate::Tree::iter()] instead. + /// - Results are returned in sort order as per tree-sorting rules, files first, then directories, one level at a time. + /// - for obtaining the direct children of the tree, use [Tree::iter()] instead. pub fn breadthfirst(&self, delegate: &mut V) -> Result<(), gix_traverse::tree::breadthfirst::Error> where V: gix_traverse::tree::Visit, @@ -52,4 +54,17 @@ impl Platform<'_, '_> { let state = gix_traverse::tree::breadthfirst::State::default(); gix_traverse::tree::breadthfirst(root, state, &self.root.repo.objects, delegate) } + + /// Start a depth-first, recursive traversal using `delegate`, for which a [`Recorder`](gix_traverse::tree::Recorder) can be used to get started. + /// + /// # Note + /// + /// For obtaining the direct children of the tree, use [Tree::iter()] instead. + pub fn depthfirst(&self, delegate: &mut V) -> Result<(), gix_traverse::tree::breadthfirst::Error> + where + V: gix_traverse::tree::Visit, + { + let state = gix_traverse::tree::depthfirst::State::default(); + gix_traverse::tree::depthfirst(self.root.id, state, &self.root.repo.objects, delegate) + } } diff --git a/gix/src/pathspec.rs b/gix/src/pathspec.rs index be69ad0321e..78316c1037b 100644 --- a/gix/src/pathspec.rs +++ b/gix/src/pathspec.rs @@ -202,7 +202,7 @@ impl PathspecDetached { } } -fn is_dir_to_mode(is_dir: bool) -> gix_index::entry::Mode { +pub(crate) fn is_dir_to_mode(is_dir: bool) -> gix_index::entry::Mode { if is_dir { gix_index::entry::Mode::DIR } else { diff --git a/gix/src/remote/connection/fetch/receive_pack.rs b/gix/src/remote/connection/fetch/receive_pack.rs index ef328da2d38..e9e932d44a9 100644 --- a/gix/src/remote/connection/fetch/receive_pack.rs +++ b/gix/src/remote/connection/fetch/receive_pack.rs @@ -1,7 +1,7 @@ use crate::{ config::{ cache::util::ApplyLeniency, - tree::{Clone, Fetch, Key}, + tree::{Clone, Fetch}, }, remote, remote::{ @@ -117,7 +117,7 @@ where let negotiator = repo .config .resolved - .string(Fetch::NEGOTIATION_ALGORITHM.logical_name().as_str()) + .string(Fetch::NEGOTIATION_ALGORITHM) .map(|n| Fetch::NEGOTIATION_ALGORITHM.try_into_negotiation_algorithm(n)) .transpose() .with_leniency(repo.config.lenient_config)? diff --git a/gix/src/remote/url/scheme_permission.rs b/gix/src/remote/url/scheme_permission.rs index 7709537fed7..47fbd351b8a 100644 --- a/gix/src/remote/url/scheme_permission.rs +++ b/gix/src/remote/url/scheme_permission.rs @@ -3,7 +3,7 @@ use std::{borrow::Cow, collections::BTreeMap}; use crate::{ bstr::{BStr, BString, ByteSlice}, config, - config::tree::{gitoxide, Key, Protocol}, + config::tree::{gitoxide, Protocol}, }; /// All allowed values of the `protocol.allow` key. @@ -91,7 +91,7 @@ impl SchemePermission { let user_allowed = saw_user.then(|| { config - .string_filter(gitoxide::Allow::PROTOCOL_FROM_USER.logical_name().as_str(), &mut filter) + .string_filter(gitoxide::Allow::PROTOCOL_FROM_USER, &mut filter) .map_or(true, |val| val.as_ref() == "1") }); Ok(SchemePermission { diff --git a/gix/src/repository/identity.rs b/gix/src/repository/identity.rs index 5b833991de9..a9b3d7899b3 100644 --- a/gix/src/repository/identity.rs +++ b/gix/src/repository/identity.rs @@ -149,7 +149,7 @@ impl Personas { user_email = user_email.or_else(|| { config - .string(gitoxide::User::EMAIL_FALLBACK.logical_name().as_str()) + .string(gitoxide::User::EMAIL_FALLBACK) .map(std::borrow::Cow::into_owned) }); Personas { diff --git a/gix/src/repository/mailmap.rs b/gix/src/repository/mailmap.rs index 9b5b47d78a8..d19d567adeb 100644 --- a/gix/src/repository/mailmap.rs +++ b/gix/src/repository/mailmap.rs @@ -1,4 +1,4 @@ -use crate::config::tree::{Key, Mailmap}; +use crate::config::tree::Mailmap; use crate::Id; impl crate::Repository { @@ -68,7 +68,7 @@ impl crate::Repository { let configured_path = self .config_snapshot() - .trusted_path(Mailmap::FILE.logical_name().as_str()) + .trusted_path(&Mailmap::FILE) .and_then(|res| res.map_err(|e| err.get_or_insert(e.into())).ok()); if let Some(mut file) = diff --git a/gix/src/repository/shallow.rs b/gix/src/repository/shallow.rs index 322c0c315e8..90947a25d9c 100644 --- a/gix/src/repository/shallow.rs +++ b/gix/src/repository/shallow.rs @@ -1,9 +1,6 @@ use std::{borrow::Cow, path::PathBuf}; -use crate::{ - config::tree::{gitoxide, Key}, - Repository, -}; +use crate::{config::tree::gitoxide, Repository}; impl Repository { /// Return `true` if the repository is a shallow clone, i.e. contains history only up to a certain depth. @@ -36,10 +33,7 @@ impl Repository { let shallow_name = self .config .resolved - .string_filter( - gitoxide::Core::SHALLOW_FILE.logical_name().as_str(), - &mut self.filter_config_section(), - ) + .string_filter(gitoxide::Core::SHALLOW_FILE, &mut self.filter_config_section()) .unwrap_or_else(|| Cow::Borrowed("shallow".into())); self.common_dir().join(gix_path::from_bstr(shallow_name)) } diff --git a/gix/src/status/index_worktree.rs b/gix/src/status/index_worktree.rs index de20c5fd119..3dccb18ec28 100644 --- a/gix/src/status/index_worktree.rs +++ b/gix/src/status/index_worktree.rs @@ -110,16 +110,8 @@ impl Repository { crate::worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, None, )?; - let pathspec = crate::Pathspec::new( - self, - options - .dirwalk_options - .as_ref() - .map_or(false, |opts| opts.empty_patterns_match_prefix), - patterns, - true, /* inherit ignore case */ - || Ok(attrs_and_excludes.clone()), - )?; + let pathspec = + self.index_worktree_status_pathspec::(patterns, index, options.dirwalk_options.as_ref())?; let cwd = self.current_dir(); let git_dir_realpath = crate::path::realpath_opts(self.git_dir(), cwd, crate::path::realpath::MAX_SYMLINKS)?; @@ -167,6 +159,31 @@ impl Repository { )?; Ok(out) } + + pub(super) fn index_worktree_status_pathspec( + &self, + patterns: impl IntoIterator>, + index: &gix_index::State, + options: Option<&crate::dirwalk::Options>, + ) -> Result, E> + where + E: From + From, + { + let empty_patterns_match_prefix = options.map_or(false, |opts| opts.empty_patterns_match_prefix); + let attrs_and_excludes = self.attributes( + index, + crate::worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + crate::worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + None, + )?; + Ok(crate::Pathspec::new( + self, + empty_patterns_match_prefix, + patterns, + true, /* inherit ignore case */ + move || Ok(attrs_and_excludes.inner), + )?) + } } /// An implementation of a trait to use with [`Repository::index_worktree_status()`] to compute the submodule status @@ -272,7 +289,7 @@ mod submodule_status { /// /// ### Index Changes /// -/// Changes to the index are collected and it's possible to write the index back using [iter::Outcome::write_changes()]. +/// Changes to the index are collected and it's possible to write the index back using [Outcome::write_changes()](crate::status::Outcome). /// Note that these changes are not observable, they will always be kept. /// /// ### Parallel Operation @@ -287,124 +304,110 @@ mod submodule_status { /// to interrupt unless [`status::Platform::should_interrupt_*()`](crate::status::Platform::should_interrupt_shared()) was /// configured. pub struct Iter { - #[cfg(feature = "parallel")] - #[allow(clippy::type_complexity)] - rx_and_join: Option<( - std::sync::mpsc::Receiver, - std::thread::JoinHandle>, - )>, - #[cfg(feature = "parallel")] - should_interrupt: crate::status::OwnedOrStaticAtomicBool, - /// Without parallelization, the iterator has to buffer all changes in advance. - #[cfg(not(feature = "parallel"))] - items: std::vec::IntoIter, - /// The outcome of the operation, only available once the operation has ended. - out: Option, - /// The set of `(entry_index, change)` we extracted in order to potentially write back the index with the changes applied. - changes: Vec<(usize, iter::ApplyChange)>, + inner: crate::status::Iter, +} + +/// The item produced by the iterator +#[derive(Clone, PartialEq, Debug)] +pub enum Item { + /// A tracked file was modified, and index-specific information is passed. + Modification { + /// The entry with modifications. + entry: gix_index::Entry, + /// The index of the `entry` for lookup in [`gix_index::State::entries()`] - useful to look at neighbors. + entry_index: usize, + /// The repository-relative path of the entry. + rela_path: BString, + /// The computed status of the entry. + status: gix_status::index_as_worktree::EntryStatus<(), crate::submodule::Status>, + }, + /// An entry returned by the directory walk, without any relation to the index. + /// + /// This can happen if ignored files are returned as well, or if rename-tracking is disabled. + DirectoryContents { + /// The entry found during the disk traversal. + entry: gix_dir::Entry, + /// `collapsed_directory_status` is `Some(dir_status)` if this `entry` was part of a directory with the given + /// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was + /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). + /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). + collapsed_directory_status: Option, + }, + /// The rewrite tracking discovered a match between a deleted and added file, and considers them equal enough, + /// depending on the tracker settings. + /// + /// Note that the source of the rewrite is always the index as it detects the absence of entries, something that + /// can't be done during a directory walk. + Rewrite { + /// The source of the rewrite operation. + source: RewriteSource, + /// The untracked entry found during the disk traversal, the destination of the rewrite. + /// + /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the destination of the rewrite, and the current + /// location of the entry. + dirwalk_entry: gix_dir::Entry, + /// `collapsed_directory_status` is `Some(dir_status)` if this `dirwalk_entry` was part of a directory with the given + /// `dir_status` that wasn't the same as the one of `dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was + /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). + /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). + dirwalk_entry_collapsed_directory_status: Option, + /// The object id after the rename, specifically hashed in order to determine equality. + dirwalk_entry_id: gix_hash::ObjectId, + /// It's `None` if the 'source.id' is equal to `dirwalk_entry_id`, as identity made an actual diff computation unnecessary. + /// Otherwise, and if enabled, it's `Some(stats)` to indicate how similar both entries were. + diff: Option, + /// If true, this rewrite is created by copy, and 'source.id' is pointing to its source. + /// Otherwise, it's a rename, and 'source.id' points to a deleted object, + /// as renames are tracked as deletions and additions of the same or similar content. + copy: bool, + }, +} + +/// Either an index entry for renames or another directory entry in case of copies. +#[derive(Clone, PartialEq, Debug)] +pub enum RewriteSource { + /// The source originates in the index and is detected as missing in the working tree. + /// This can also happen for copies. + RewriteFromIndex { + /// The entry that is the source of the rewrite, which means it was removed on disk, + /// equivalent to [Change::Removed](gix_status::index_as_worktree::Change::Removed). + /// + /// Note that the [entry-id](gix_index::Entry::id) is the content-id of the source of the rewrite. + source_entry: gix_index::Entry, + /// The index of the `source_entry` for lookup in [`gix_index::State::entries()`] - useful to look at neighbors. + source_entry_index: usize, + /// The repository-relative path of the `source_entry`. + source_rela_path: BString, + /// The computed status of the `source_entry`. + source_status: gix_status::index_as_worktree::EntryStatus<(), crate::submodule::Status>, + }, + /// This source originates in the directory tree and is always the source of copies. + CopyFromDirectoryEntry { + /// The source of the copy operation, which is also an entry of the directory walk. + /// + /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the source of the rewrite. + source_dirwalk_entry: gix_dir::Entry, + /// `collapsed_directory_status` is `Some(dir_status)` if this `source_dirwalk_entry` was part of a directory with the given + /// `dir_status` that wasn't the same as the one of `source_dirwalk_entry` and + /// if [gix_dir::walk::Options::emit_collapsed] was [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). + /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). + source_dirwalk_entry_collapsed_directory_status: Option, + /// The object id as it would appear if the entry was written to the object database. + /// It's the same as [`dirwalk_entry_id`](Item::Rewrite), or `diff` is `Some(_)` to indicate that the copy + /// was determined by similarity, not by content equality. + source_dirwalk_entry_id: gix_hash::ObjectId, + }, } /// pub mod iter { use crate::bstr::{BStr, BString}; - use crate::config::cache::util::ApplyLeniencyDefault; - use crate::status::index_worktree::{iter, BuiltinSubmoduleStatus}; use crate::status::{index_worktree, Platform}; - use crate::worktree::IndexPersistedOrInMemory; use gix_status::index_as_worktree::{Change, EntryStatus}; + use super::{Item, RewriteSource}; pub use gix_status::index_as_worktree_with_renames::Summary; - pub(super) enum ApplyChange { - SetSizeToZero, - NewStat(crate::index::entry::Stat), - } - - /// The data the thread sends over to the receiving iterator. - pub struct Outcome { - /// The outcome of the index-to-worktree comparison operation. - pub index_worktree: gix_status::index_as_worktree_with_renames::Outcome, - /// The index that was used for the operation. - pub index: crate::worktree::IndexPersistedOrInMemory, - skip_hash: bool, - changes: Option>, - } - - impl Outcome { - /// Returns `true` if the index has received currently unapplied changes that *should* be written back. - /// - /// If they are not written back, subsequent `status` operations will take longer to complete, whereas the - /// additional work can be prevented by writing the changes back to the index. - pub fn has_changes(&self) -> bool { - self.changes.as_ref().map_or(false, |changes| !changes.is_empty()) - } - - /// Write the changes if there are any back to the index file. - /// This can only be done once as the changes are consumed in the process, if there were any. - pub fn write_changes(&mut self) -> Option> { - let _span = gix_features::trace::coarse!("gix::status::index_worktree::iter::Outcome::write_changes()"); - let changes = self.changes.take()?; - let mut index = match &self.index { - IndexPersistedOrInMemory::Persisted(persisted) => (***persisted).clone(), - IndexPersistedOrInMemory::InMemory(index) => index.clone(), - }; - - let entries = index.entries_mut(); - for (entry_index, change) in changes { - let entry = &mut entries[entry_index]; - match change { - ApplyChange::SetSizeToZero => { - entry.stat.size = 0; - } - ApplyChange::NewStat(new_stat) => { - entry.stat = new_stat; - } - } - } - - Some(index.write(crate::index::write::Options { - extensions: Default::default(), - skip_hash: self.skip_hash, - })) - } - } - - /// Either an index entry for renames or another directory entry in case of copies. - #[derive(Clone, PartialEq, Debug)] - pub enum RewriteSource { - /// The source originates in the index and is detected as missing in the working tree. - /// This can also happen for copies. - RewriteFromIndex { - /// The entry that is the source of the rewrite, which means it was removed on disk, - /// equivalent to [Change::Removed]. - /// - /// Note that the [entry-id](gix_index::Entry::id) is the content-id of the source of the rewrite. - source_entry: gix_index::Entry, - /// The index of the `source_entry` for lookup in [`gix_index::State::entries()`] - useful to look at neighbors. - source_entry_index: usize, - /// The repository-relative path of the `source_entry`. - source_rela_path: BString, - /// The computed status of the `source_entry`. - source_status: gix_status::index_as_worktree::EntryStatus<(), crate::submodule::Status>, - }, - /// This source originates in the directory tree and is always the source of copies. - CopyFromDirectoryEntry { - /// The source of the copy operation, which is also an entry of the directory walk. - /// - /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the source of the rewrite. - source_dirwalk_entry: gix_dir::Entry, - /// `collapsed_directory_status` is `Some(dir_status)` if this `source_dirwalk_entry` was part of a directory with the given - /// `dir_status` that wasn't the same as the one of `source_dirwalk_entry` and - /// if [gix_dir::walk::Options::emit_collapsed] was [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). - /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). - source_dirwalk_entry_collapsed_directory_status: Option, - /// The object id as it would appear if the entry was written to the object database. - /// It's the same as [`dirwalk_entry_id`](Item::Rewrite), or `diff` is `Some(_)` to indicate that the copy - /// was determined by similarity, not by content equality. - source_dirwalk_entry_id: gix_hash::ObjectId, - }, - } - /// Access impl RewriteSource { /// The repository-relative path of this source. @@ -448,62 +451,6 @@ pub mod iter { } } - /// The item produced by the iterator - #[derive(Clone, PartialEq, Debug)] - pub enum Item { - /// A tracked file was modified, and index-specific information is passed. - Modification { - /// The entry with modifications. - entry: gix_index::Entry, - /// The index of the `entry` for lookup in [`gix_index::State::entries()`] - useful to look at neighbors. - entry_index: usize, - /// The repository-relative path of the entry. - rela_path: BString, - /// The computed status of the entry. - status: gix_status::index_as_worktree::EntryStatus<(), SubmoduleStatus>, - }, - /// An entry returned by the directory walk, without any relation to the index. - /// - /// This can happen if ignored files are returned as well, or if rename-tracking is disabled. - DirectoryContents { - /// The entry found during the disk traversal. - entry: gix_dir::Entry, - /// `collapsed_directory_status` is `Some(dir_status)` if this `entry` was part of a directory with the given - /// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was - /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). - /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). - collapsed_directory_status: Option, - }, - /// The rewrite tracking discovered a match between a deleted and added file, and considers them equal enough, - /// depending on the tracker settings. - /// - /// Note that the source of the rewrite is always the index as it detects the absence of entries, something that - /// can't be done during a directory walk. - Rewrite { - /// The source of the rewrite operation. - source: RewriteSource, - /// The untracked entry found during the disk traversal, the destination of the rewrite. - /// - /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the destination of the rewrite, and the current - /// location of the entry. - dirwalk_entry: gix_dir::Entry, - /// `collapsed_directory_status` is `Some(dir_status)` if this `dirwalk_entry` was part of a directory with the given - /// `dir_status` that wasn't the same as the one of `dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was - /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). - /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). - dirwalk_entry_collapsed_directory_status: Option, - /// The object id after the rename, specifically hashed in order to determine equality. - dirwalk_entry_id: gix_hash::ObjectId, - /// It's `None` if the 'source.id' is equal to `dirwalk_entry_id`, as identity made an actual diff computation unnecessary. - /// Otherwise, and if enabled, it's `Some(stats)` to indicate how similar both entries were. - diff: Option, - /// If true, this rewrite is created by copy, and 'source.id' is pointing to its source. - /// Otherwise, it's a rename, and 'source.id' points to a deleted object, - /// as renames are tracked as deletions and additions of the same or similar content. - copy: bool, - }, - } - impl Item { /// Return a simplified summary of the item as digest of its status, or `None` if this item is /// created from the directory walk and is *not untracked*, or if it is merely to communicate @@ -591,24 +538,6 @@ pub mod iter { type SubmoduleStatus = crate::submodule::Status; - /// The error returned by [Platform::into_index_worktree_iter()](crate::status::Platform::into_index_worktree_iter()). - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error(transparent)] - Index(#[from] crate::worktree::open_index::Error), - #[error("Failed to spawn producer thread")] - #[cfg(feature = "parallel")] - SpawnThread(#[source] std::io::Error), - #[error(transparent)] - #[cfg(not(feature = "parallel"))] - IndexWorktreeStatus(#[from] crate::status::index_worktree::Error), - #[error(transparent)] - ConfigSkipHash(#[from] crate::config::boolean::Error), - #[error(transparent)] - PrepareSubmodules(#[from] crate::submodule::modules::Error), - } - /// Lifecycle impl Platform<'_, Progress> where @@ -620,105 +549,14 @@ pub mod iter { /// - Optional patterns to use to limit the paths to look at. If empty, all paths are considered. #[doc(alias = "diff_index_to_workdir", alias = "git2")] pub fn into_index_worktree_iter( - self, + mut self, patterns: impl IntoIterator, - ) -> Result { - let index = match self.index { - None => IndexPersistedOrInMemory::Persisted(self.repo.index_or_empty()?), - Some(index) => index, - }; - - let skip_hash = self - .repo - .config - .resolved - .boolean(crate::config::tree::Index::SKIP_HASH) - .map(|res| crate::config::tree::Index::SKIP_HASH.enrich_error(res)) - .transpose() - .with_lenient_default(self.repo.config.lenient_config)? - .unwrap_or_default(); - let should_interrupt = self.should_interrupt.clone().unwrap_or_default(); - let submodule = BuiltinSubmoduleStatus::new(self.repo.clone().into_sync(), self.submodules)?; - #[cfg(feature = "parallel")] - { - let (tx, rx) = std::sync::mpsc::channel(); - let mut collect = Collect { tx }; - let patterns: Vec<_> = patterns.into_iter().collect(); - let join = std::thread::Builder::new() - .name("gix::status::index_worktree::iter::producer".into()) - .spawn({ - let repo = self.repo.clone().into_sync(); - let options = self.index_worktree_options; - let should_interrupt = should_interrupt.clone(); - let mut progress = self.progress; - move || -> Result<_, crate::status::index_worktree::Error> { - let repo = repo.to_thread_local(); - let out = repo.index_worktree_status( - &index, - patterns, - &mut collect, - gix_status::index_as_worktree::traits::FastEq, - submodule, - &mut progress, - &should_interrupt, - options, - )?; - Ok(Outcome { - index_worktree: out, - index, - changes: None, - skip_hash, - }) - } - }) - .map_err(Error::SpawnThread)?; - - Ok(super::Iter { - rx_and_join: Some((rx, join)), - should_interrupt, - changes: Vec::new(), - out: None, - }) - } - #[cfg(not(feature = "parallel"))] - { - let mut collect = Collect { items: Vec::new() }; - - let repo = self.repo.clone().into_sync(); - let options = self.index_worktree_options; - let mut progress = self.progress; - let repo = repo.to_thread_local(); - let out = repo.index_worktree_status( - &index, - patterns, - &mut collect, - gix_status::index_as_worktree::traits::FastEq, - submodule, - &mut progress, - &should_interrupt, - options, - )?; - let mut out = Outcome { - index_worktree: out, - index, - changes: None, - skip_hash, - }; - let mut iter = super::Iter { - items: Vec::new().into_iter(), - changes: Vec::new(), - out: None, - }; - let items = collect - .items - .into_iter() - .filter_map(|item| iter.maybe_keep_index_change(item)) - .collect::>(); - out.changes = (!iter.changes.is_empty()).then(|| std::mem::take(&mut iter.changes)); - iter.items = items.into_iter(); - iter.out = Some(out); - Ok(iter) - } + ) -> Result { + // deactivate the tree-iteration + self.head_tree = None; + Ok(index_worktree::Iter { + inner: self.into_iter(patterns)?, + }) } } @@ -726,107 +564,32 @@ pub mod iter { type Item = Result; fn next(&mut self) -> Option { - #[cfg(feature = "parallel")] - loop { - let (rx, _join) = self.rx_and_join.as_ref()?; - match rx.recv().ok() { - Some(item) => { - if let Some(item) = self.maybe_keep_index_change(item) { - break Some(Ok(item)); - } - continue; - } - None => { - let (_rx, handle) = self.rx_and_join.take()?; - break match handle.join().expect("no panic") { - Ok(mut out) => { - out.changes = Some(std::mem::take(&mut self.changes)); - self.out = Some(out); - None - } - Err(err) => Some(Err(err)), - }; + self.inner.next().map(|res| { + res.map(|item| match item { + crate::status::Item::IndexWorktree(item) => item, + crate::status::Item::TreeIndex(_) => unreachable!("BUG: we deactivated this kind of traversal"), + }) + .map_err(|err| match err { + crate::status::iter::Error::IndexWorktree(err) => err, + crate::status::iter::Error::TreeIndex(_) => { + unreachable!("BUG: we deactivated this kind of traversal") } - } - } - #[cfg(not(feature = "parallel"))] - self.items.next().map(Ok) + }) + }) } } /// Access impl super::Iter { /// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed. - pub fn outcome_mut(&mut self) -> Option<&mut Outcome> { - self.out.as_mut() + pub fn outcome_mut(&mut self) -> Option<&mut crate::status::Outcome> { + self.inner.out.as_mut() } /// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration /// isn't complete. - pub fn into_outcome(mut self) -> Option { - self.out.take() - } - } - - impl super::Iter { - fn maybe_keep_index_change(&mut self, item: Item) -> Option { - let change = match item { - Item::Modification { - status: gix_status::index_as_worktree::EntryStatus::NeedsUpdate(stat), - entry_index, - .. - } => (entry_index, ApplyChange::NewStat(stat)), - Item::Modification { - status: - gix_status::index_as_worktree::EntryStatus::Change( - gix_status::index_as_worktree::Change::Modification { - set_entry_stat_size_zero, - .. - }, - ), - entry_index, - .. - } if set_entry_stat_size_zero => (entry_index, ApplyChange::SetSizeToZero), - _ => return Some(item), - }; - - self.changes.push(change); - None - } - } - - #[cfg(feature = "parallel")] - impl Drop for super::Iter { - fn drop(&mut self) { - crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt); - } - } - - struct Collect { - #[cfg(feature = "parallel")] - tx: std::sync::mpsc::Sender, - #[cfg(not(feature = "parallel"))] - items: Vec, - } - - impl<'index> gix_status::index_as_worktree_with_renames::VisitEntry<'index> for Collect { - type ContentChange = ::Output; - type SubmoduleStatus = - ::Output; - - fn visit_entry( - &mut self, - entry: gix_status::index_as_worktree_with_renames::Entry< - 'index, - Self::ContentChange, - Self::SubmoduleStatus, - >, - ) { - // NOTE: we assume that the receiver triggers interruption so the operation will stop if the receiver is down. - #[cfg(feature = "parallel")] - self.tx.send(entry.into()).ok(); - #[cfg(not(feature = "parallel"))] - self.items.push(entry.into()); + pub fn into_outcome(mut self) -> Option { + self.inner.out.take() } } } diff --git a/gix/src/status/iter/mod.rs b/gix/src/status/iter/mod.rs new file mode 100644 index 00000000000..6df82c5f8cd --- /dev/null +++ b/gix/src/status/iter/mod.rs @@ -0,0 +1,330 @@ +use crate::bstr::BString; +use crate::config::cache::util::ApplyLeniencyDefault; +use crate::status::index_worktree::BuiltinSubmoduleStatus; +use crate::status::{index_worktree, tree_index, Platform}; +use crate::worktree::IndexPersistedOrInMemory; +use gix_status::index_as_worktree::{Change, EntryStatus}; +use std::sync::atomic::Ordering; + +pub(super) mod types; +use types::{ApplyChange, Item, Iter, Outcome}; + +/// Lifecycle +impl Platform<'_, Progress> +where + Progress: gix_features::progress::Progress, +{ + /// Turn the platform into an iterator for changes between the head-tree and the index, and the index and the working tree, + /// while optionally listing untracked and/or ignored files. + /// + /// * `patterns` + /// - Optional patterns to use to limit the paths to look at. If empty, all paths are considered. + #[doc(alias = "diff_index_to_workdir", alias = "git2")] + pub fn into_iter( + self, + patterns: impl IntoIterator, + ) -> Result { + let index = match self.index { + None => IndexPersistedOrInMemory::Persisted(self.repo.index_or_empty()?), + Some(index) => index, + }; + + let obtain_tree_id = || -> Result, crate::status::into_iter::Error> { + Ok(match self.head_tree { + Some(None) => Some(self.repo.head_tree_id()?.into()), + Some(Some(tree_id)) => Some(tree_id), + None => None, + }) + }; + + let skip_hash = self + .repo + .config + .resolved + .boolean(crate::config::tree::Index::SKIP_HASH) + .map(|res| crate::config::tree::Index::SKIP_HASH.enrich_error(res)) + .transpose() + .with_lenient_default(self.repo.config.lenient_config)? + .unwrap_or_default(); + let should_interrupt = self.should_interrupt.clone().unwrap_or_default(); + let submodule = BuiltinSubmoduleStatus::new(self.repo.clone().into_sync(), self.submodules)?; + #[cfg(feature = "parallel")] + { + let (tx, rx) = std::sync::mpsc::channel(); + let patterns: Vec<_> = patterns.into_iter().collect(); + let join_tree_index = if let Some(tree_id) = obtain_tree_id()? { + std::thread::Builder::new() + .name("gix::status::tree_index::producer".into()) + .spawn({ + let repo = self.repo.clone().into_sync(); + let should_interrupt = should_interrupt.clone(); + let tx = tx.clone(); + let tree_index_renames = self.tree_index_renames; + let index = index.clone(); + let crate::Pathspec { repo: _, stack, search } = self + .repo + .index_worktree_status_pathspec::( + &patterns, + &index, + self.index_worktree_options.dirwalk_options.as_ref(), + )?; + move || -> Result<_, _> { + let repo = repo.to_thread_local(); + let mut pathspec = crate::Pathspec { + repo: &repo, + stack, + search, + }; + repo.tree_index_status( + &tree_id, + &index, + Some(&mut pathspec), + tree_index_renames, + |change, _, _| { + let action = if tx.send(change.into_owned().into()).is_err() + || should_interrupt.load(Ordering::Acquire) + { + gix_diff::index::Action::Cancel + } else { + gix_diff::index::Action::Continue + }; + Ok::<_, std::convert::Infallible>(action) + }, + ) + } + }) + .map_err(crate::status::into_iter::Error::SpawnThread)? + .into() + } else { + None + }; + let mut collect = Collect { tx }; + let join_index_worktree = std::thread::Builder::new() + .name("gix::status::index_worktree::producer".into()) + .spawn({ + let repo = self.repo.clone().into_sync(); + let options = self.index_worktree_options; + let should_interrupt = should_interrupt.clone(); + let mut progress = self.progress; + move || -> Result<_, index_worktree::Error> { + let repo = repo.to_thread_local(); + let out = repo.index_worktree_status( + &index, + patterns, + &mut collect, + gix_status::index_as_worktree::traits::FastEq, + submodule, + &mut progress, + &should_interrupt, + options, + )?; + Ok(Outcome { + index_worktree: out, + tree_index: None, + worktree_index: index, + changes: None, + skip_hash, + }) + } + }) + .map_err(crate::status::into_iter::Error::SpawnThread)?; + + Ok(Iter { + rx_and_join: Some((rx, join_index_worktree, join_tree_index)), + should_interrupt, + index_changes: Vec::new(), + out: None, + }) + } + #[cfg(not(feature = "parallel"))] + { + let mut collect = Collect { items: Vec::new() }; + + let repo = self.repo; + let options = self.index_worktree_options; + let mut progress = self.progress; + let patterns: Vec = patterns.into_iter().collect(); + let (mut items, tree_index) = match obtain_tree_id()? { + Some(tree_id) => { + let mut pathspec = repo.index_worktree_status_pathspec::( + &patterns, + &index, + self.index_worktree_options.dirwalk_options.as_ref(), + )?; + let mut items = Vec::new(); + let tree_index = self.repo.tree_index_status( + &tree_id, + &index, + Some(&mut pathspec), + self.tree_index_renames, + |change, _, _| { + items.push(change.into_owned().into()); + let action = if should_interrupt.load(Ordering::Acquire) { + gix_diff::index::Action::Cancel + } else { + gix_diff::index::Action::Continue + }; + Ok::<_, std::convert::Infallible>(action) + }, + )?; + (items, Some(tree_index)) + } + None => (Vec::new(), None), + }; + let out = repo.index_worktree_status( + &index, + patterns, + &mut collect, + gix_status::index_as_worktree::traits::FastEq, + submodule, + &mut progress, + &should_interrupt, + options, + )?; + let mut iter = Iter { + items: Vec::new().into_iter(), + index_changes: Vec::new(), + out: None, + }; + let mut out = Outcome { + index_worktree: out, + worktree_index: index, + tree_index, + changes: None, + skip_hash, + }; + items.extend( + collect + .items + .into_iter() + .filter_map(|item| iter.maybe_keep_index_change(item)), + ); + out.changes = (!iter.index_changes.is_empty()).then(|| std::mem::take(&mut iter.index_changes)); + iter.items = items.into_iter(); + iter.out = Some(out); + Ok(iter) + } + } +} + +/// The error returned for each item returned by [`Iter`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + IndexWorktree(#[from] index_worktree::Error), + #[error(transparent)] + TreeIndex(#[from] tree_index::Error), +} + +impl Iterator for Iter { + type Item = Result; + + fn next(&mut self) -> Option { + #[cfg(feature = "parallel")] + loop { + let (rx, _join_worktree, _join_tree) = self.rx_and_join.as_ref()?; + match rx.recv().ok() { + Some(item) => { + if let Some(item) = self.maybe_keep_index_change(item) { + break Some(Ok(item)); + } + continue; + } + None => { + let (_rx, worktree_handle, tree_handle) = self.rx_and_join.take()?; + let tree_index = if let Some(handle) = tree_handle { + match handle.join().expect("no panic") { + Ok(out) => Some(out), + Err(err) => break Some(Err(err.into())), + } + } else { + None + }; + break match worktree_handle.join().expect("no panic") { + Ok(mut out) => { + out.changes = Some(std::mem::take(&mut self.index_changes)); + out.tree_index = tree_index; + self.out = Some(out); + None + } + Err(err) => Some(Err(err.into())), + }; + } + } + } + #[cfg(not(feature = "parallel"))] + self.items.next().map(Ok) + } +} + +/// Access +impl Iter { + /// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed. + pub fn outcome_mut(&mut self) -> Option<&mut Outcome> { + self.out.as_mut() + } + + /// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration + /// isn't complete. + pub fn into_outcome(mut self) -> Option { + self.out.take() + } +} + +impl Iter { + fn maybe_keep_index_change(&mut self, item: Item) -> Option { + let change = match item { + Item::IndexWorktree(index_worktree::Item::Modification { + status: EntryStatus::NeedsUpdate(stat), + entry_index, + .. + }) => (entry_index, ApplyChange::NewStat(stat)), + Item::IndexWorktree(index_worktree::Item::Modification { + status: + EntryStatus::Change(Change::Modification { + set_entry_stat_size_zero, + .. + }), + entry_index, + .. + }) if set_entry_stat_size_zero => (entry_index, ApplyChange::SetSizeToZero), + _ => return Some(item), + }; + + self.index_changes.push(change); + None + } +} + +#[cfg(feature = "parallel")] +impl Drop for Iter { + fn drop(&mut self) { + crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt); + } +} + +struct Collect { + #[cfg(feature = "parallel")] + tx: std::sync::mpsc::Sender, + #[cfg(not(feature = "parallel"))] + items: Vec, +} + +impl<'index> gix_status::index_as_worktree_with_renames::VisitEntry<'index> for Collect { + type ContentChange = + ::Output; + type SubmoduleStatus = ::Output; + + fn visit_entry( + &mut self, + entry: gix_status::index_as_worktree_with_renames::Entry<'index, Self::ContentChange, Self::SubmoduleStatus>, + ) { + // NOTE: we assume that the receiver triggers interruption so the operation will stop if the receiver is down. + let item = Item::IndexWorktree(entry.into()); + #[cfg(feature = "parallel")] + self.tx.send(item).ok(); + #[cfg(not(feature = "parallel"))] + self.items.push(item); + } +} diff --git a/gix/src/status/iter/types.rs b/gix/src/status/iter/types.rs new file mode 100644 index 00000000000..6087ca25110 --- /dev/null +++ b/gix/src/status/iter/types.rs @@ -0,0 +1,139 @@ +use crate::bstr::BStr; +use crate::status::{index_worktree, tree_index}; +use crate::worktree::IndexPersistedOrInMemory; + +/// An iterator for changes between the index and the worktree and the head-tree and the index. +/// +/// Note that depending on the underlying configuration, there might be a significant delay until the first +/// item is received due to the buffering necessary to perform rename tracking and/or sorting. +/// +/// ### Submodules +/// +/// Note that submodules can be set to 'inactive', which will not exclude them from the status operation, similar to +/// how `git status` includes them. +/// +/// ### Index Changes +/// +/// Changes to the index are collected, and it's possible to write the index back using [Outcome::write_changes()]. +/// Note that these changes are not observable, they will always be kept. +/// +/// ### Parallel Operation +/// +/// Note that without the `parallel` feature, the iterator becomes 'serial', which means all status will be computed in advance, +/// and it's non-interruptible, yielding worse performance for is-dirty checks for instance as interruptions won't happen. +/// It's a crutch that is just there to make single-threaded applications possible at all, as it's not really an iterator +/// anymore. If this matters, better run [Repository::index_worktree_status()](crate::Repository::index_worktree_status) by hand +/// as it provides all control one would need, just not as an iterator. +/// +/// Also, even with `parallel` set, the first call to `next()` will block until there is an item available, without a chance +/// to interrupt unless [`status::Platform::should_interrupt_*()`](crate::status::Platform::should_interrupt_shared()) was +/// configured. +pub struct Iter { + #[cfg(feature = "parallel")] + #[allow(clippy::type_complexity)] + pub(super) rx_and_join: Option<( + std::sync::mpsc::Receiver, + std::thread::JoinHandle>, + Option>>, + )>, + #[cfg(feature = "parallel")] + pub(super) should_interrupt: crate::status::OwnedOrStaticAtomicBool, + /// Without parallelization, the iterator has to buffer all changes in advance. + #[cfg(not(feature = "parallel"))] + pub(super) items: std::vec::IntoIter, + /// The outcome of the operation, only available once the operation has ended. + pub(in crate::status) out: Option, + /// The set of `(entry_index, change)` we extracted in order to potentially write back the worktree index with the changes applied. + pub(super) index_changes: Vec<(usize, ApplyChange)>, +} + +/// The item produced by the [iterator](Iter). +#[derive(Clone, PartialEq, Debug)] +pub enum Item { + /// A change between the index and the worktree. + /// + /// Note that untracked changes are also collected here. + IndexWorktree(index_worktree::Item), + /// A change between the three of `HEAD` and the index. + TreeIndex(gix_diff::index::Change), +} + +/// The data the thread sends over to the receiving iterator. +pub struct Outcome { + /// The outcome of the index-to-worktree comparison operation. + pub index_worktree: gix_status::index_as_worktree_with_renames::Outcome, + /// The outcome of the diff between `HEAD^{tree}` and the index, or `None` if this outcome + /// was produced with the [`into_index_worktree_iter()`](crate::status::Platform::into_index_worktree_iter()). + pub tree_index: Option, + /// The worktree index that was used for the operation. + pub worktree_index: IndexPersistedOrInMemory, + pub(super) skip_hash: bool, + pub(super) changes: Option>, +} + +impl Outcome { + /// Returns `true` if the index has received currently unapplied changes that *should* be written back. + /// + /// If they are not written back, subsequent `status` operations will take longer to complete, whereas the + /// additional work can be prevented by writing the changes back to the index. + pub fn has_changes(&self) -> bool { + self.changes.as_ref().map_or(false, |changes| !changes.is_empty()) + } + + /// Write the changes if there are any back to the index file. + /// This can only be done once as the changes are consumed in the process, if there were any. + pub fn write_changes(&mut self) -> Option> { + let _span = gix_features::trace::coarse!("gix::status::index_worktree::Outcome::write_changes()"); + let changes = self.changes.take()?; + let mut index = match &self.worktree_index { + IndexPersistedOrInMemory::Persisted(persisted) => (***persisted).clone(), + IndexPersistedOrInMemory::InMemory(index) => index.clone(), + }; + + let entries = index.entries_mut(); + for (entry_index, change) in changes { + let entry = &mut entries[entry_index]; + match change { + ApplyChange::SetSizeToZero => { + entry.stat.size = 0; + } + ApplyChange::NewStat(new_stat) => { + entry.stat = new_stat; + } + } + } + + Some(index.write(crate::index::write::Options { + extensions: Default::default(), + skip_hash: self.skip_hash, + })) + } +} + +pub(super) enum ApplyChange { + SetSizeToZero, + NewStat(crate::index::entry::Stat), +} + +impl From for Item { + fn from(value: index_worktree::Item) -> Self { + Item::IndexWorktree(value) + } +} + +impl From for Item { + fn from(value: gix_diff::index::Change) -> Self { + Item::TreeIndex(value) + } +} + +/// Access +impl Item { + /// Return the relative path at which the item can currently be found in the working tree or index. + pub fn location(&self) -> &BStr { + match self { + Item::IndexWorktree(change) => change.rela_path(), + Item::TreeIndex(change) => change.fields().0, + } + } +} diff --git a/gix/src/status/mod.rs b/gix/src/status/mod.rs index 92656fcfd3e..5e6bfdc3777 100644 --- a/gix/src/status/mod.rs +++ b/gix/src/status/mod.rs @@ -11,8 +11,10 @@ where repo: &'repo Repository, progress: Progress, index: Option, + head_tree: Option>, submodules: Submodule, index_worktree_options: index_worktree::Options, + tree_index_renames: tree_index::TrackRenames, should_interrupt: Option, } @@ -104,6 +106,8 @@ impl Repository { index: None, submodules: Submodule::default(), should_interrupt: None, + head_tree: Some(None), + tree_index_renames: Default::default(), index_worktree_options: index_worktree::Options { sorting: None, dirwalk_options: Some(self.dirwalk_options()?), @@ -130,6 +134,7 @@ impl Repository { /// pub mod is_dirty { use crate::Repository; + use std::convert::Infallible; /// The error returned by [Repository::is_dirty()]. #[derive(Debug, thiserror::Error)] @@ -138,7 +143,13 @@ pub mod is_dirty { #[error(transparent)] StatusPlatform(#[from] crate::status::Error), #[error(transparent)] - CreateStatusIterator(#[from] crate::status::index_worktree::iter::Error), + CreateStatusIterator(#[from] crate::status::into_iter::Error), + #[error(transparent)] + TreeIndexStatus(#[from] crate::status::tree_index::Error), + #[error(transparent)] + HeadTreeId(#[from] crate::reference::head_tree_id::Error), + #[error(transparent)] + OpenWorktreeIndex(#[from] crate::worktree::open_index::Error), } impl Repository { @@ -150,12 +161,29 @@ pub mod is_dirty { /// * submodules are taken in consideration, along with their `ignore` and `isActive` configuration /// /// Note that *untracked files* do *not* affect this flag. - /// - /// ### Incomplete Implementation Warning - /// - /// Currently, this does not compute changes between the head and the index. - // TODO: use iterator which also tests for head->index changes. + // TODO(performance): this could be its very own implementation with parallelism and the special: + // stop once there is a change flag, but without using the iterator for + // optimal resource usage. pub fn is_dirty(&self) -> Result { + { + let head_tree_id = self.head_tree_id()?; + let mut index_is_dirty = false; + + // Run this first as there is a high likelihood to find something, and it's very fast. + self.tree_index_status( + &head_tree_id, + &*self.index_or_empty()?, + None, + crate::status::tree_index::TrackRenames::Disabled, + |_, _, _| { + index_is_dirty = true; + Ok::<_, Infallible>(gix_diff::index::Action::Cancel) + }, + )?; + if index_is_dirty { + return Ok(true); + } + } let is_dirty = self .status(gix_features::progress::Discard)? .index_worktree_rewrites(None) @@ -172,7 +200,45 @@ pub mod is_dirty { } } +/// +pub mod into_iter { + /// The error returned by [status::Platform::into_iter()](crate::status::Platform::into_iter()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Index(#[from] crate::worktree::open_index::Error), + #[error("Failed to spawn producer thread")] + #[cfg(feature = "parallel")] + SpawnThread(#[source] std::io::Error), + #[error(transparent)] + #[cfg(not(feature = "parallel"))] + IndexWorktreeStatus(#[from] crate::status::index_worktree::Error), + #[error(transparent)] + ConfigSkipHash(#[from] crate::config::boolean::Error), + #[error(transparent)] + PrepareSubmodules(#[from] crate::submodule::modules::Error), + #[error("Could not create an index for the head tree to compare with the worktree index")] + HeadTreeIndex(#[from] crate::repository::index_from_tree::Error), + #[error("Could not obtain the tree id pointed to by `HEAD`")] + HeadTreeId(#[from] crate::reference::head_tree_id::Error), + #[error(transparent)] + AttributesAndExcludes(#[from] crate::repository::attributes::Error), + #[error(transparent)] + Pathspec(#[from] crate::pathspec::init::Error), + #[error(transparent)] + HeadTreeDiff(#[from] crate::status::tree_index::Error), + } +} + mod platform; /// pub mod index_worktree; + +/// +pub mod tree_index; + +/// +pub mod iter; +pub use iter::types::{Item, Iter, Outcome}; diff --git a/gix/src/status/platform.rs b/gix/src/status/platform.rs index 43c5fbdeabf..b9086152c46 100644 --- a/gix/src/status/platform.rs +++ b/gix/src/status/platform.rs @@ -1,4 +1,4 @@ -use crate::status::{index_worktree, OwnedOrStaticAtomicBool, Platform, Submodule, UntrackedFiles}; +use crate::status::{index_worktree, tree_index, OwnedOrStaticAtomicBool, Platform, Submodule, UntrackedFiles}; use std::sync::atomic::AtomicBool; /// Builder @@ -107,4 +107,20 @@ where cb(&mut self.index_worktree_options); self } + + /// Set the tree at which the `HEAD` ref ought to reside. + /// Setting this explicitly allows to compare the index to a tree that it possibly didn't originate from. + /// + /// If not set explicitly, it will be read via `HEAD^{tree}`. + pub fn head_tree(mut self, tree: impl Into) -> Self { + self.head_tree = Some(Some(tree.into())); + self + } + + /// Configure how rename tracking should be performed when looking at changes between the [head tree](Self::head_tree()) + /// and the index. + pub fn tree_index_track_renames(mut self, renames: tree_index::TrackRenames) -> Self { + self.tree_index_renames = renames; + self + } } diff --git a/gix/src/status/tree_index.rs b/gix/src/status/tree_index.rs new file mode 100644 index 00000000000..06828dfa777 --- /dev/null +++ b/gix/src/status/tree_index.rs @@ -0,0 +1,140 @@ +use crate::config::tree; +use crate::Repository; + +/// The error returned by [Repository::tree_index_status()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + IndexFromMTree(#[from] crate::repository::index_from_tree::Error), + #[error(transparent)] + RewritesConfiguration(#[from] crate::diff::new_rewrites::Error), + #[error("Could not create diff-cache for similarity checks")] + DiffResourceCache(#[from] crate::repository::diff_resource_cache::Error), + #[error(transparent)] + TreeIndexDiff(#[from] gix_diff::index::Error), +} + +/// Specify how to perform rewrite tracking [Repository::tree_index_status()]. +#[derive(Default, Debug, Copy, Clone)] +pub enum TrackRenames { + /// Check `status.renames` and then `diff.renames` if the former isn't set. Otherwise, default to performing rewrites if nothing + /// is set. + #[default] + AsConfigured, + /// Track renames according ot the given configuration. + Given(gix_diff::Rewrites), + /// Do not track renames. + Disabled, +} + +/// The outcome of [Repository::tree_index_status()]. +#[derive(Clone)] +pub struct Outcome { + /// Additional information produced by the rename tracker. + /// + /// It may be `None` if rename tracking was disabled. + pub rewrite: Option, + /// The index produced from the input `tree` for the purpose of diffing. + /// + /// At some point this might go away once it's possible to diff an index from a tree directly. + pub tree_index: gix_index::State, +} + +impl Repository { + /// Produce the `git status` portion that shows the difference between `tree_id` (usually `HEAD^{tree}`) and the `worktree_index` + /// (typically the current `.git/index`), and pass all changes to `cb(change, tree_index, worktree_index)` with + /// full access to both indices that contributed to the change. + /// + /// *(It's notable that internally, the `tree_id` is converted into an index before diffing these)*. + /// Set `pathspec` to `Some(_)` to further reduce the set of files to check. + /// + /// ### Notes + /// + /// * This is a low-level method - prefer the [`Repository::status()`] platform instead for access to various iterators + /// over the same information. + pub fn tree_index_status<'repo, E>( + &'repo self, + tree_id: &gix_hash::oid, + worktree_index: &gix_index::State, + pathspec: Option<&mut crate::Pathspec<'repo>>, + renames: TrackRenames, + mut cb: impl FnMut( + gix_diff::index::ChangeRef<'_, '_>, + &gix_index::State, + &gix_index::State, + ) -> Result, + ) -> Result + where + E: Into>, + { + let _span = gix_trace::coarse!("gix::tree_index_status"); + let tree_index: gix_index::State = self.index_from_tree(tree_id)?.into(); + let rewrites = match renames { + TrackRenames::AsConfigured => { + let (mut rewrites, mut is_configured) = crate::diff::utils::new_rewrites_inner( + &self.config.resolved, + self.config.lenient_config, + &tree::Status::RENAMES, + &tree::Status::RENAME_LIMIT, + )?; + if !is_configured { + (rewrites, is_configured) = + crate::diff::utils::new_rewrites(&self.config.resolved, self.config.lenient_config)?; + } + if !is_configured { + rewrites = Some(Default::default()); + } + rewrites + } + TrackRenames::Given(rewrites) => Some(rewrites), + TrackRenames::Disabled => None, + }; + let mut resource_cache = None; + if rewrites.is_some() { + resource_cache = Some(self.diff_resource_cache_for_tree_diff()?); + } + let mut pathspec_storage = None; + if pathspec.is_none() { + pathspec_storage = self + .pathspec( + true, + None::<&str>, + false, + &gix_index::State::new(self.object_hash()), + gix_worktree::stack::state::attributes::Source::IdMapping, + ) + .expect("Impossible for this to fail without patterns") + .into(); + } + + let pathspec = + pathspec.unwrap_or_else(|| pathspec_storage.as_mut().expect("set if pathspec isn't set by user")); + let rewrite = gix_diff::index( + &tree_index, + worktree_index, + |change| cb(change, &tree_index, worktree_index), + rewrites + .zip(resource_cache.as_mut()) + .map(|(rewrites, resource_cache)| gix_diff::index::RewriteOptions { + resource_cache, + find: self, + rewrites, + }), + &mut pathspec.search, + &mut |relative_path, case, is_dir, out| { + let stack = pathspec.stack.as_mut().expect("initialized in advance"); + stack + .set_case(case) + .at_entry( + relative_path, + Some(crate::pathspec::is_dir_to_mode(is_dir)), + &pathspec.repo.objects, + ) + .map_or(false, |platform| platform.matching_attributes(out)) + }, + )?; + + Ok(Outcome { rewrite, tree_index }) + } +} diff --git a/gix/src/submodule/mod.rs b/gix/src/submodule/mod.rs index 4e595887bb2..dc80da3531d 100644 --- a/gix/src/submodule/mod.rs +++ b/gix/src/submodule/mod.rs @@ -299,9 +299,9 @@ pub mod status { #[error(transparent)] StatusPlatform(#[from] crate::status::Error), #[error(transparent)] - Status(#[from] crate::status::index_worktree::iter::Error), + StatusIter(#[from] crate::status::into_iter::Error), #[error(transparent)] - IndexWorktreeStatus(#[from] crate::status::index_worktree::Error), + NextStatusItem(#[from] crate::status::iter::Error), } impl Submodule<'_> { @@ -323,15 +323,15 @@ pub mod status { /// Return the status of the submodule, just like [`status`](Self::status), but allows to adjust options /// for more control over how the status is performed. /// + /// If `check_dirty` is `true`, the computation will stop once the first in a ladder operations + /// ordered from cheap to expensive shows that the submodule is dirty. When checking for detailed + /// status information (i.e. untracked file, modifications, HEAD-index changes) only the first change + /// will be kept to stop as early as possible. + /// /// Use `&mut std::convert::identity` for `adjust_options` if no specific options are desired. /// A reason to change them might be to enable sorting to enjoy deterministic order of changes. /// /// The status allows to easily determine if a submodule [has changes](Status::is_dirty). - /// - /// ### Incomplete Implementation Warning - /// - /// Currently, changes between the head and the index aren't computed. - // TODO: Run the full status, including tree->index once available. #[doc(alias = "submodule_status", alias = "git2")] pub fn status_opts( &self, @@ -390,10 +390,13 @@ pub mod status { opts.dirwalk_options = None; } }) - .into_index_worktree_iter(Vec::new())?; + .into_iter(None)?; let mut changes = Vec::new(); for change in statuses { changes.push(change?); + if check_dirty { + break; + } } status.changes = Some(changes); Ok(status) @@ -444,7 +447,7 @@ pub mod status { /// /// `None` if the computation wasn't performed as the computation was skipped early, or if no working tree was /// available or repository was available. - pub changes: Option>, + pub changes: Option>, } } } diff --git a/gix/src/util.rs b/gix/src/util.rs index e9d1e10f8e9..789bcc9b1c7 100644 --- a/gix/src/util.rs +++ b/gix/src/util.rs @@ -53,11 +53,16 @@ impl From> for OwnedOrStaticAtomicBool { } } #[cfg(feature = "parallel")] -pub fn parallel_iter_drop( - mut rx_and_join: Option<(std::sync::mpsc::Receiver, std::thread::JoinHandle)>, +#[allow(clippy::type_complexity)] +pub fn parallel_iter_drop( + mut rx_and_join: Option<( + std::sync::mpsc::Receiver, + std::thread::JoinHandle, + Option>, + )>, should_interrupt: &OwnedOrStaticAtomicBool, ) { - let Some((rx, handle)) = rx_and_join.take() else { + let Some((rx, handle, maybe_handle)) = rx_and_join.take() else { return; }; let prev = should_interrupt.swap(true, std::sync::atomic::Ordering::Relaxed); @@ -66,11 +71,14 @@ pub fn parallel_iter_drop( OwnedOrStaticAtomicBool::Owned { flag, private: false } => flag.as_ref(), OwnedOrStaticAtomicBool::Owned { private: true, .. } => { // Leak the handle to let it shut down in the background, so drop returns more quickly. - drop((rx, handle)); + drop((rx, handle, maybe_handle)); return; } }; // Wait until there is time to respond before we undo the change. + if let Some(handle) = maybe_handle { + handle.join().ok(); + } handle.join().ok(); undo.fetch_update( std::sync::atomic::Ordering::SeqCst, diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index b56ccd3fbc1..39c2272b3d8 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -23,6 +23,7 @@ pub type Index = gix_fs::SharedFileSnapshot; /// A type to represent an index which either was loaded from disk as it was persisted there, or created on the fly in memory. #[cfg(feature = "index")] #[allow(clippy::large_enum_variant)] +#[derive(Clone)] pub enum IndexPersistedOrInMemory { /// The index as loaded from disk, and shared across clones of the owning `Repository`. Persisted(Index), diff --git a/gix/tests/fixtures/generated-archives/make_status_repos.tar b/gix/tests/fixtures/generated-archives/make_status_repos.tar index bfa898c90c1..2666626e624 100644 Binary files a/gix/tests/fixtures/generated-archives/make_status_repos.tar and b/gix/tests/fixtures/generated-archives/make_status_repos.tar differ diff --git a/gix/tests/fixtures/generated-archives/make_submodules.tar b/gix/tests/fixtures/generated-archives/make_submodules.tar index 2a38f8e7def..4a3566aabdf 100644 Binary files a/gix/tests/fixtures/generated-archives/make_submodules.tar and b/gix/tests/fixtures/generated-archives/make_submodules.tar differ diff --git a/gix/tests/fixtures/make_status_repos.sh b/gix/tests/fixtures/make_status_repos.sh index 63c1cfb1d48..3a4c3d535f7 100755 --- a/gix/tests/fixtures/make_status_repos.sh +++ b/gix/tests/fixtures/make_status_repos.sh @@ -13,3 +13,11 @@ git init -q untracked-only mkdir new touch new/untracked subdir/untracked ) + +git init git-mv +(cd git-mv + echo hi > file + git add file && git commit -m "init" + + git mv file renamed +) diff --git a/gix/tests/fixtures/make_submodules.sh b/gix/tests/fixtures/make_submodules.sh index be7fcd48887..906170711ed 100755 --- a/gix/tests/fixtures/make_submodules.sh +++ b/gix/tests/fixtures/make_submodules.sh @@ -21,6 +21,17 @@ git init submodule-head-changed cd m1 && git checkout @~1 ) +git init submodule-index-changed +(cd submodule-index-changed + git submodule add ../module1 m1 + git commit -m "add submodule" + + (cd m1 + git mv subdir subdir-renamed + git mv this that + ) +) + git init submodule-head-changed-no-worktree (cd submodule-head-changed-no-worktree git submodule add ../module1 m1 @@ -68,6 +79,12 @@ git init modified-untracked-and-submodule-head-changed-and-modified touch untracked ) +cp -Rv modified-untracked-and-submodule-head-changed-and-modified git-mv-and-untracked-and-submodule-head-changed-and-modified +(cd git-mv-and-untracked-and-submodule-head-changed-and-modified + git checkout this + git mv this that +) + git init with-submodules (cd with-submodules mkdir dir diff --git a/gix/tests/gix/status.rs b/gix/tests/gix/status.rs index dad2eb188e7..981bc34a392 100644 --- a/gix/tests/gix/status.rs +++ b/gix/tests/gix/status.rs @@ -16,16 +16,102 @@ pub fn repo(name: &str) -> crate::Result { )?) } +mod into_iter { + use crate::status::{repo, submodule_repo}; + use crate::util::hex_to_id; + use gix::status::tree_index::TrackRenames; + use gix::status::Item; + use gix_diff::Rewrites; + use gix_testtools::size_ok; + + #[test] + fn item_size() { + let actual = std::mem::size_of::(); + let expected = 264; + assert!( + size_ok(actual, expected), + "The size is the same as the one for the index-worktree-item: {actual} <~ {expected}" + ); + } + + #[test] + fn submodule_tree_index_modification() -> crate::Result { + let repo = submodule_repo("git-mv-and-untracked-and-submodule-head-changed-and-modified")?; + let mut status = repo + .status(gix::progress::Discard)? + .index_worktree_options_mut(|opts| { + opts.sorting = + Some(gix::status::plumbing::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); + }) + .tree_index_track_renames(TrackRenames::Given(Rewrites { + track_empty: true, + ..Default::default() + })) + .into_iter(None)?; + let mut items: Vec<_> = status.by_ref().filter_map(Result::ok).collect(); + items.sort_by(|a, b| a.location().cmp(b.location())); + assert_eq!(items.len(), 3, "1 untracked, 1 move, 1 submodule modification"); + insta::assert_debug_snapshot!(&items[1], @r#" + TreeIndex( + Rewrite { + source_location: "this", + source_index: 2, + source_entry_mode: Mode( + FILE, + ), + source_id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + location: "that", + index: 2, + entry_mode: Mode( + FILE, + ), + id: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391), + copy: false, + }, + ) + "#); + Ok(()) + } + + #[test] + fn error_during_tree_traversal_causes_failure() -> crate::Result { + let repo = repo("untracked-only")?; + let platform = repo.status(gix::progress::Discard)?.head_tree(hex_to_id( + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", /* empty blob, invalid tree*/ + )); + let expected_err = "Could not create index from tree at e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"; + if cfg!(feature = "parallel") { + let mut items: Vec<_> = platform.into_iter(None)?.collect(); + assert_eq!( + items.len(), + 3, + "2 untracked and one error, which is detected only in the end." + ); + assert_eq!(items.pop().expect("last item").unwrap_err().to_string(), expected_err); + } else { + match platform.into_iter(None) { + Ok(_) => { + unreachable!("errors would be detected early here as everything is done ahead of time") + } + Err(err) => { + assert_eq!(err.to_string(), expected_err); + } + } + } + Ok(()) + } +} + mod index_worktree { mod iter { use crate::status::{repo, submodule_repo}; - use gix::status::index_worktree::iter::Item; + use gix::status::index_worktree::Item; use gix_testtools::size_ok; use pretty_assertions::assert_eq; #[test] fn item_size() { - let actual = std::mem::size_of::(); + let actual = std::mem::size_of::(); let expected = 264; assert!( size_ok(actual, expected), @@ -42,7 +128,7 @@ mod index_worktree { opts.sorting = Some(gix::status::plumbing::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); }) - .into_index_worktree_iter(Vec::new())?; + .into_index_worktree_iter(None)?; let items: Vec<_> = status.by_ref().filter_map(Result::ok).collect(); assert_eq!(items.len(), 3, "1 untracked, 1 modified file, 1 submodule modification"); Ok(()) @@ -57,7 +143,7 @@ mod index_worktree { opts.sorting = Some(gix::status::plumbing::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); }) - .into_index_worktree_iter(Vec::new())?; + .into_index_worktree_iter(None)?; let items: Vec<_> = status.filter_map(Result::ok).collect(); assert_eq!( items, @@ -103,7 +189,7 @@ mod index_worktree { opts.sorting = Some(gix::status::plumbing::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); }) - .into_index_worktree_iter(Vec::new())?; + .into_index_worktree_iter(None)?; let items: Vec<_> = status.by_ref().filter_map(Result::ok).collect(); assert_eq!(items, [], "no untracked files are found…"); assert_eq!( @@ -124,7 +210,7 @@ mod index_worktree { opts.sorting = Some(gix::status::plumbing::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); }) - .into_index_worktree_iter(Vec::new())? + .into_index_worktree_iter(None)? .next() .is_some(); assert!(is_dirty, "this should abort the work as quickly as possible"); @@ -134,7 +220,7 @@ mod index_worktree { } mod is_dirty { - use crate::status::submodule_repo; + use crate::status::{repo, submodule_repo}; #[test] fn various_changes_positive() -> crate::Result { @@ -155,7 +241,7 @@ mod is_dirty { let repo = submodule_repo("module1")?; assert_eq!( repo.status(gix::progress::Discard)? - .into_index_worktree_iter(Vec::new())? + .into_index_worktree_iter(None)? .count(), 1, "there is one untracked file" @@ -168,9 +254,18 @@ mod is_dirty { } #[test] - fn no_changes() -> crate::Result { + fn index_changed() -> crate::Result { + let repo = repo("git-mv")?; + assert!( + repo.is_dirty()?, + "the only detectable change is in the index, in comparison to the HEAD^{{tree}}" + ); + let repo = submodule_repo("with-submodules")?; - assert!(!repo.is_dirty()?, "there are no changes"); + assert!( + repo.is_dirty()?, + "the index changed here as well, this time there is also a new file" + ); Ok(()) } } diff --git a/gix/tests/gix/submodule.rs b/gix/tests/gix/submodule.rs index 6f1105a5b37..cb86129c9bf 100644 --- a/gix/tests/gix/submodule.rs +++ b/gix/tests/gix/submodule.rs @@ -186,6 +186,37 @@ mod open { Ok(()) } + #[test] + fn modified_in_index_only() -> crate::Result { + let repo = repo("submodule-index-changed")?; + let sm = repo.submodules()?.into_iter().flatten().next().expect("one submodule"); + + for mode in [ + gix::submodule::config::Ignore::Untracked, + gix::submodule::config::Ignore::None, + ] { + for check_dirty in [false, true] { + let status = sm.status_opts(mode, check_dirty, &mut |platform| platform)?; + assert_eq!( + status.is_dirty(), + Some(true), + "two files were renamed using `git mv` for an HEAD^{{tree}}-index change" + ); + assert_eq!( + status.changes.expect("present").len(), + if check_dirty { 1 } else { 3 }, + "in is-dirty mode, we don't collect all changes" + ); + } + } + + assert!( + repo.is_dirty()?, + "superproject should see submodule changes in the index as well" + ); + Ok(()) + } + #[test] fn modified_and_untracked() -> crate::Result { let repo = repo("modified-and-untracked")?; @@ -194,7 +225,7 @@ mod open { let status = sm.status(gix::submodule::config::Ignore::Dirty, false)?; assert_eq!(status.is_dirty(), Some(false), "Dirty skips worktree changes entirely"); - let status = sm.status_opts( + let mut status = sm.status_opts( gix::submodule::config::Ignore::None, false, &mut |status: gix::status::Platform<'_, gix::progress::Discard>| { @@ -217,16 +248,18 @@ mod open { let status_with_dirty_check = sm.status_opts( gix::submodule::config::Ignore::None, - true, + true, /* check-dirty */ &mut |status: gix::status::Platform<'_, gix::progress::Discard>| { status.index_worktree_options_mut(|opts| { opts.sorting = Some(gix_status::index_as_worktree_with_renames::Sorting::ByPathCaseSensitive); }) }, )?; + status.changes.as_mut().expect("two changes").pop(); assert_eq!( status_with_dirty_check, status, - "it cannot abort early as the only change it sees is the modification check" + "it cannot abort early as the only change it sees is the modification check.\ + However, with check-dirty, it would only gather the changes" ); let status = sm.status(gix::submodule::config::Ignore::Untracked, false)?;