From bd5f44925306aa342b2b1c547779799b72372212 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 19 Feb 2024 15:53:16 +0100 Subject: [PATCH 1/7] feat!: Represent `DotGit` as `ExtendedKind` This cleans up the model despite also making it harder to detect whether something is a DotGit. --- gix-dir/src/entry.rs | 63 ++++++--- gix-dir/src/lib.rs | 12 +- gix-dir/src/walk/classify.rs | 83 +++++++---- gix-dir/src/walk/function.rs | 22 +-- gix-dir/src/walk/readdir.rs | 51 ++++--- gix-dir/tests/walk/mod.rs | 240 +++++++++++++++++++++++++++----- gix-dir/tests/walk_utils/mod.rs | 28 +++- 7 files changed, 369 insertions(+), 130 deletions(-) diff --git a/gix-dir/src/entry.rs b/gix-dir/src/entry.rs index 816acf8a424..dfd65ffc1f6 100644 --- a/gix-dir/src/entry.rs +++ b/gix-dir/src/entry.rs @@ -2,40 +2,45 @@ use crate::walk::ForDeletionMode; use crate::{Entry, EntryRef}; use std::borrow::Cow; -/// The kind of the entry. +/// A way of attaching additional information to an [Entry] . +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub enum Property { + /// The entry was named `.git`, matched according to the case-sensitivity rules of the repository. + DotGit, + /// The entry is a directory, and that directory is empty. + EmptyDirectory, + /// Always in conjunction with a directory on disk that is also known as cone-mode sparse-checkout exclude marker + /// - i.e. a directory that is excluded, so its whole content is excluded and not checked out nor is part of the index. + /// + /// Note that evne if the directory is empty, it will only have this state, not `EmptyDirectory`. + TrackedExcluded, +} + +/// The kind of the entry, seated in their kinds available on disk. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum Kind { /// The entry is a blob, executable or not. File, /// The entry is a symlink. Symlink, - /// A directory that contains no file or directory. - EmptyDirectory, /// The entry is an ordinary directory. /// /// Note that since we don't check for bare repositories, this could in fact be a collapsed /// bare repository. To be sure, check it again with [`gix_discover::is_git()`] and act accordingly. Directory, - /// The entry is a directory which *contains* a `.git` folder. + /// The entry is a directory which *contains* a `.git` folder, or a submodule entry in the index. Repository, } /// The kind of entry as obtained from a directory. -/// -/// The order of variants roughly relates from cheap-to-compute to most expensive, as each level needs more tests to assert. -/// Thus, `DotGit` is the cheapest, while `Untracked` is among the most expensive and one of the major outcomes of any -/// [`walk`](crate::walk()) run. -/// For example, if an entry was `Pruned`, we effectively don't know if it would have been `Untracked` as well as we stopped looking. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum Status { - /// The filename of an entry was `.git`, which is generally pruned. - DotGit, - /// The provided pathspec prevented further processing as the path didn't match. - /// If this happens, no further checks are done so we wouldn't know if the path is also ignored for example (by mention in `.gitignore`). + /// The entry was removed from the walk due to its other properties, like [Property] or [PathspecMatch] + /// + /// Note that entries flagged as `DotGit` directory will always be considered `Pruned`, but if they are + /// also ignored, in delete mode, they will be considered `Ignored` instead. This way, it's easier to remove them + /// while they will not be available for any interactions in read-only mode. Pruned, - /// Always in conjunction with a directory on disk that is also known as cone-mode sparse-checkout exclude marker - i.e. a directory - /// that is excluded, so its whole content is excluded and not checked out nor is part of the index. - TrackedExcluded, /// The entry is tracked in Git. Tracked, /// The entry is ignored as per `.gitignore` files and their rules. @@ -52,7 +57,7 @@ pub enum Status { #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] pub enum PathspecMatch { /// The match happened because there wasn't any pattern, which matches all, or because there was a nil pattern or one with an empty path. - /// Thus this is not a match by merit. + /// Thus, this is not a match by merit. Always, /// A match happened, but the pattern excludes everything it matches, which means this entry was excluded. Excluded, @@ -84,12 +89,24 @@ impl From for PathspecMatch { } } +impl From> for PathspecMatch { + fn from(m: gix_pathspec::search::Match<'_>) -> Self { + if m.is_excluded() { + PathspecMatch::Excluded + } else { + m.kind.into() + } + } +} + +/// Conversion impl EntryRef<'_> { /// Strip the lifetime to obtain a fully owned copy. pub fn to_owned(&self) -> Entry { Entry { rela_path: self.rela_path.clone().into_owned(), status: self.status, + property: self.property, disk_kind: self.disk_kind, index_kind: self.index_kind, pathspec_match: self.pathspec_match, @@ -101,6 +118,7 @@ impl EntryRef<'_> { Entry { rela_path: self.rela_path.into_owned(), status: self.status, + property: self.property, disk_kind: self.disk_kind, index_kind: self.index_kind, pathspec_match: self.pathspec_match, @@ -108,12 +126,14 @@ impl EntryRef<'_> { } } +/// Conversion impl Entry { /// Obtain an [`EntryRef`] from this instance. pub fn to_ref(&self) -> EntryRef<'_> { EntryRef { rela_path: Cow::Borrowed(self.rela_path.as_ref()), status: self.status, + property: self.property, disk_kind: self.disk_kind, index_kind: self.index_kind, pathspec_match: self.pathspec_match, @@ -136,10 +156,7 @@ impl From for Kind { impl Status { /// Return true if this status is considered pruned. A pruned entry is typically hidden from view due to a pathspec. pub fn is_pruned(&self) -> bool { - match self { - Status::DotGit | Status::TrackedExcluded | Status::Pruned => true, - Status::Ignored(_) | Status::Untracked | Status::Tracked => false, - } + matches!(&self, Status::Pruned) } /// Return `true` if `file_type` is a directory on disk and isn't ignored, and is not a repository. /// This implements the default rules of `git status`, which is good for a minimal traversal through @@ -158,7 +175,7 @@ impl Status { return false; } match self { - Status::DotGit | Status::TrackedExcluded | Status::Pruned => false, + Status::Pruned => false, Status::Ignored(_) => { for_deletion.map_or(false, |fd| { matches!( @@ -180,6 +197,6 @@ impl Kind { /// Return `true` if this is a directory on disk. Note that this is true for repositories as well. pub fn is_dir(&self) -> bool { - matches!(self, Kind::EmptyDirectory | Kind::Directory | Kind::Repository) + matches!(self, Kind::Directory | Kind::Repository) } } diff --git a/gix-dir/src/lib.rs b/gix-dir/src/lib.rs index 48fa269b085..fffe2d46eb4 100644 --- a/gix-dir/src/lib.rs +++ b/gix-dir/src/lib.rs @@ -29,14 +29,14 @@ pub struct EntryRef<'a> { /// Note that many entries with status `Pruned` will not show up as their kind hasn't yet been determined when they were /// pruned very early on. pub status: entry::Status, - /// Further specify the what the entry is on disk, similar to a file mode. - /// This is `None` if the entry was pruned by a pathspec that could not match, as we then won't invest the time to obtain - /// the kind of the entry on disk. + /// Additional properties of the entry. + pub property: Option, + /// Further specify what the entry is on disk, similar to a file mode. + /// This is `None` if we decided it's not worth it to exit early and avoid trying to obtain this information. pub disk_kind: Option, /// The kind of entry according to the index, if tracked. *Usually* the same as `disk_kind`. pub index_kind: Option, /// Determines how the pathspec matched. - /// Can also be `None` if no pathspec matched, or if the status check stopped prior to checking for pathspec matches which is the case for [`entry::Status::DotGit`]. /// Note that it can also be `Some(PathspecMatch::Excluded)` if a negative pathspec matched. pub pathspec_match: Option, } @@ -48,7 +48,9 @@ pub struct Entry { pub rela_path: BString, /// The status of entry, most closely related to what we know from `git status`, but not the same. pub status: entry::Status, - /// Further specify the what the entry is on disk, similar to a file mode. + /// Additional flags that further clarify properties of the entry. + pub property: Option, + /// Further specify what the entry is on disk, similar to a file mode. pub disk_kind: Option, /// The kind of entry according to the index, if tracked. *Usually* the same as `disk_kind`. pub index_kind: Option, diff --git a/gix-dir/src/walk/classify.rs b/gix-dir/src/walk/classify.rs index 1a0b19b0d9d..d23f60d0d9a 100644 --- a/gix-dir/src/walk/classify.rs +++ b/gix-dir/src/walk/classify.rs @@ -1,4 +1,5 @@ -use crate::{entry, Entry}; +use crate::{entry, Entry, EntryRef}; +use std::borrow::Cow; use crate::entry::PathspecMatch; use crate::walk::{Context, Error, ForDeletionMode, Options}; @@ -61,7 +62,10 @@ pub fn root( pub struct Outcome { /// The computed status of an entry. It can be seen as aggregate of things we know about an entry. pub status: entry::Status, - /// What the entry is on disk, or `None` if we aborted the classification early. + /// An additional property. + pub property: Option, + /// What the entry is on disk, or `None` if we aborted the classification early or an IO-error occurred + /// when querying the disk. /// /// Note that the index is used to avoid disk access provided its entries are marked uptodate /// (possibly by a prior call to update the status). @@ -89,6 +93,7 @@ impl From<&Entry> for Outcome { fn from(e: &Entry) -> Self { Outcome { status: e.status, + property: e.property, disk_kind: e.disk_kind, index_kind: e.index_kind, pathspec_match: e.pathspec_match, @@ -96,6 +101,19 @@ impl From<&Entry> for Outcome { } } +impl<'a> EntryRef<'a> { + pub(super) fn from_outcome(rela_path: Cow<'a, BStr>, info: crate::walk::classify::Outcome) -> Self { + EntryRef { + rela_path, + property: info.property, + status: info.status, + disk_kind: info.disk_kind, + index_kind: info.index_kind, + pathspec_match: info.pathspec_match, + } + } +} + /// Figure out what to do with `rela_path`, provided as worktree-relative path, with `disk_file_type` if it is known already /// as it helps to match pathspecs correctly, which can be different for directories. /// `path` is a disk-accessible variant of `rela_path` which is within the `worktree_root`, and will be modified temporarily but remain unchanged. @@ -123,12 +141,36 @@ pub fn path( ctx: &mut Context<'_>, ) -> Result { let mut out = Outcome { - status: entry::Status::DotGit, + status: entry::Status::Pruned, + property: None, disk_kind, index_kind: None, pathspec_match: None, }; if is_eq(rela_path[filename_start_idx..].as_bstr(), ".git", ignore_case) { + out.pathspec_match = ctx + .pathspec + .pattern_matching_relative_path( + rela_path.as_bstr(), + disk_kind.map(|ft| ft.is_dir()), + ctx.pathspec_attributes, + ) + .map(Into::into); + if for_deletion.is_some() { + if let Some(excluded) = ctx + .excludes + .as_mut() + .map_or(Ok(None), |stack| { + stack + .at_entry(rela_path.as_bstr(), disk_kind.map(|ft| ft.is_dir()), ctx.objects) + .map(|platform| platform.excluded_kind()) + }) + .map_err(Error::ExcludesAccess)? + { + out.status = entry::Status::Ignored(excluded); + } + } + out.property = entry::Property::DotGit.into(); return Ok(out); } let pathspec_could_match = rela_path.is_empty() @@ -139,31 +181,25 @@ pub fn path( return Ok(out.with_status(entry::Status::Pruned)); } - let (uptodate_index_kind, index_kind, mut maybe_status) = resolve_file_type_with_index( + let (uptodate_index_kind, index_kind, property) = resolve_file_type_with_index( rela_path, ctx.index, ctx.ignore_case_index_lookup.filter(|_| ignore_case), ); let mut kind = uptodate_index_kind.or(disk_kind).or_else(on_demand_disk_kind); - maybe_status = maybe_status - .or_else(|| (index_kind.map(|k| k.is_dir()) == kind.map(|k| k.is_dir())).then_some(entry::Status::Tracked)); + let maybe_status = if property.is_none() { + (index_kind.map(|k| k.is_dir()) == kind.map(|k| k.is_dir())).then_some(entry::Status::Tracked) + } else { + out.property = property; + Some(entry::Status::Pruned) + }; // We always check the pathspec to have the value filled in reliably. out.pathspec_match = ctx .pathspec - .pattern_matching_relative_path( - rela_path.as_bstr(), - disk_kind.map(|ft| ft.is_dir()), - ctx.pathspec_attributes, - ) - .map(|m| { - if m.is_excluded() { - PathspecMatch::Excluded - } else { - m.kind.into() - } - }); + .pattern_matching_relative_path(rela_path.as_bstr(), kind.map(|ft| ft.is_dir()), ctx.pathspec_attributes) + .map(Into::into); let mut maybe_upgrade_to_repository = |current_kind, find_harder: bool| { if recurse_repositories { @@ -258,8 +294,7 @@ pub fn path( /// Note that `rela_path` is used as buffer for convenience, but will be left as is when this function returns. /// Also note `maybe_file_type` will be `None` for entries that aren't up-to-date and files, for directories at least one entry must be uptodate. -/// Returns `(maybe_file_type, Option, Option(TrackedExcluded)`, with the last option being set only for sparse directories. -/// `tracked_exclued` indicates it's a sparse directory was found. +/// Returns `(maybe_file_type, Option, flags)`, with the last option being a flag set only for sparse directories in the index. /// `index_file_type` is the type of `rela_path` as available in the index. /// /// ### Shortcoming @@ -271,9 +306,9 @@ fn resolve_file_type_with_index( rela_path: &mut BString, index: &gix_index::State, ignore_case: Option<&gix_index::AccelerateLookup<'_>>, -) -> (Option, Option, Option) { +) -> (Option, Option, Option) { // TODO: either get this to work for icase as well, or remove the need for it. Logic is different in both branches. - let mut special_status = None; + let mut special_property = None; fn entry_to_kinds(entry: &gix_index::Entry) -> (Option, Option) { let kind = if entry.mode.is_submodule() { @@ -352,14 +387,14 @@ fn resolve_file_type_with_index( .filter(|_| kind.is_none()) .map_or(false, |idx| index.entries()[idx].mode.is_sparse()) { - special_status = Some(entry::Status::TrackedExcluded); + special_property = Some(entry::Property::TrackedExcluded); } (kind, is_tracked.then_some(entry::Kind::Directory)) } Some(entry) => entry_to_kinds(entry), } }; - (uptodate_kind, index_kind, special_status) + (uptodate_kind, index_kind, special_property) } fn is_eq(lhs: &BStr, rhs: impl AsRef, ignore_case: bool) -> bool { diff --git a/gix-dir/src/walk/function.rs b/gix-dir/src/walk/function.rs index 1017d1d3451..a7ec42f7a3d 100644 --- a/gix-dir/src/walk/function.rs +++ b/gix-dir/src/walk/function.rs @@ -146,14 +146,7 @@ pub(super) fn can_recurse( if info.disk_kind.map_or(true, |k| !k.is_dir()) { return false; } - let entry = EntryRef { - rela_path: Cow::Borrowed(rela_path), - status: info.status, - disk_kind: info.disk_kind, - index_kind: info.index_kind, - pathspec_match: info.pathspec_match, - }; - delegate.can_recurse(entry, for_deletion) + delegate.can_recurse(EntryRef::from_outcome(Cow::Borrowed(rela_path), info), for_deletion) } /// Possibly emit an entry to `for_each` in case the provided information makes that possible. @@ -174,7 +167,7 @@ pub(super) fn emit_entry( ) -> Action { out.seen_entries += 1; - if (!emit_empty_directories && info.disk_kind == Some(entry::Kind::EmptyDirectory) + if (!emit_empty_directories && info.property == Some(entry::Property::EmptyDirectory) || !emit_tracked && info.status == entry::Status::Tracked) || emit_ignored.is_none() && matches!(info.status, entry::Status::Ignored(_)) || !emit_pruned @@ -187,14 +180,5 @@ pub(super) fn emit_entry( } out.returned_entries += 1; - delegate.emit( - EntryRef { - rela_path, - status: info.status, - disk_kind: info.disk_kind, - index_kind: info.index_kind, - pathspec_match: info.pathspec_match, - }, - dir_status, - ) + delegate.emit(EntryRef::from_outcome(rela_path, info), dir_status) } diff --git a/gix-dir/src/walk/readdir.rs b/gix-dir/src/walk/readdir.rs index 43c1a283c3c..0d28036444d 100644 --- a/gix-dir/src/walk/readdir.rs +++ b/gix-dir/src/walk/readdir.rs @@ -6,7 +6,7 @@ use crate::entry::{PathspecMatch, Status}; use crate::walk::function::{can_recurse, emit_entry}; use crate::walk::EmissionMode::CollapseDirectory; use crate::walk::{classify, Action, CollapsedEntriesEmissionMode, Context, Delegate, Error, Options, Outcome}; -use crate::{entry, walk, Entry}; +use crate::{entry, walk, Entry, EntryRef}; /// ### Deviation /// @@ -115,13 +115,8 @@ impl State { /// Hold the entry with the given `status` if it's a candidate for collapsing the containing directory. fn held_for_directory_collapse(&mut self, rela_path: &BStr, info: classify::Outcome, opts: &Options) -> bool { if opts.should_hold(info.status) { - self.on_hold.push(Entry { - rela_path: rela_path.to_owned(), - status: info.status, - disk_kind: info.disk_kind, - index_kind: info.index_kind, - pathspec_match: info.pathspec_match, - }); + self.on_hold + .push(EntryRef::from_outcome(Cow::Borrowed(rela_path), info).into_owned()); true } else { false @@ -200,15 +195,19 @@ impl Mark { ) -> walk::Action { if num_entries == 0 { let empty_info = classify::Outcome { - disk_kind: if num_entries == 0 { + property: if num_entries == 0 { assert_ne!( dir_info.disk_kind, Some(entry::Kind::Repository), "BUG: it shouldn't be possible to classify an empty dir as repository" ); - Some(entry::Kind::EmptyDirectory) + if dir_info.property.is_none() { + entry::Property::EmptyDirectory.into() + } else { + dir_info.property + } } else { - dir_info.disk_kind + dir_info.property }, pathspec_match: ctx .pathspec @@ -217,13 +216,9 @@ impl Mark { ..dir_info }; if opts.should_hold(empty_info.status) { - state.on_hold.push(Entry { - rela_path: dir_rela_path.to_owned(), - status: empty_info.status, - disk_kind: empty_info.disk_kind, - index_kind: empty_info.index_kind, - pathspec_match: empty_info.pathspec_match, - }); + state + .on_hold + .push(EntryRef::from_outcome(Cow::Borrowed(dir_rela_path), empty_info).into_owned()); Action::Continue } else { emit_entry(Cow::Borrowed(dir_rela_path), empty_info, None, opts, out, delegate) @@ -285,7 +280,7 @@ impl Mark { } matching_entries += usize::from(pathspec_match.map_or(false, |m| !m.should_ignore())); match status { - Status::DotGit | Status::Pruned | Status::TrackedExcluded => { + Status::Pruned => { unreachable!("BUG: pruned aren't ever held, check `should_hold()`") } Status::Tracked => { /* causes the folder not to be collapsed */ } @@ -359,13 +354,17 @@ impl Mark { } out.seen_entries += removed_without_emitting as u32; - state.on_hold.push(Entry { - rela_path: dir_rela_path.to_owned(), - status: dir_status, - disk_kind: dir_info.disk_kind, - index_kind: dir_info.index_kind, - pathspec_match: dir_pathspec_match, - }); + state.on_hold.push( + EntryRef::from_outcome( + Cow::Borrowed(dir_rela_path), + classify::Outcome { + status: dir_status, + pathspec_match: dir_pathspec_match, + ..dir_info + }, + ) + .into_owned(), + ); Some(action) } } diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index dabb51c862a..91c5518ed03 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -9,6 +9,7 @@ use crate::walk_utils::{ use gix_dir::entry; use gix_dir::entry::Kind::*; use gix_dir::entry::PathspecMatch::*; +use gix_dir::entry::Property::*; use gix_dir::entry::Status::*; use gix_dir::walk::CollapsedEntriesEmissionMode::{All, OnStatusMismatch}; use gix_dir::walk::EmissionMode::*; @@ -81,7 +82,7 @@ fn empty_root() -> crate::Result { ); assert_eq!( entries, - [entry("", Untracked, EmptyDirectory)], + [entry("", Untracked, Directory).with_property(EmptyDirectory)], "this is how we can indicate the worktree is entirely untracked" ); Ok(()) @@ -103,10 +104,10 @@ fn complex_empty() -> crate::Result { entries, &[ entry("dirs-and-files/dir/file", Untracked, File), - entry("dirs-and-files/sub", Untracked, EmptyDirectory), - entry("empty-toplevel", Untracked, EmptyDirectory), - entry("only-dirs/other", Untracked, EmptyDirectory), - entry("only-dirs/sub/subsub", Untracked, EmptyDirectory), + entry("dirs-and-files/sub", Untracked, Directory).with_property(EmptyDirectory), + entry("empty-toplevel", Untracked, Directory).with_property(EmptyDirectory), + entry("only-dirs/other", Untracked, Directory).with_property(EmptyDirectory), + entry("only-dirs/sub/subsub", Untracked, Directory).with_property(EmptyDirectory), ], "we see each and every directory, and get it classified as empty as it's set to be emitted" ); @@ -160,7 +161,7 @@ fn complex_empty() -> crate::Result { entries, &[ entry("dirs-and-files", Untracked, Directory), - entry("empty-toplevel", Untracked, EmptyDirectory), + entry("empty-toplevel", Untracked, Directory).with_property(EmptyDirectory), entry("only-dirs", Untracked, Directory), ], "empty directories collapse just fine" @@ -242,6 +243,120 @@ fn ignored_with_prefix_pathspec_collapses_just_like_untracked() -> crate::Result Ok(()) } +#[test] +fn ignored_dir_with_cwd_handling() -> crate::Result { + let root = fixture("untracked-and-ignored-for-collapse"); + let ((out, _root), entries) = collect_filtered_with_cwd( + &root, + Some(&root.join("ignored")), + None, + |keep, ctx| { + walk( + &root, + ctx, + walk::Options { + for_deletion: Some(Default::default()), + emit_ignored: Some(CollapseDirectory), + ..options() + }, + keep, + ) + }, + None::<&str>, + ); + + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 2, + returned_entries: entries.len(), + seen_entries: 3, + } + ); + assert_eq!( + entries, + [entryps("ignored", Ignored(Expendable), Directory, Prefix)], + "even if the traversal root is for deletion, unless the CWD is set it will be collapsed (no special cases)" + ); + + let real_root = gix_path::realpath(&root)?; + let ((out, _root), entries) = collect_filtered_with_cwd( + &real_root, + Some(&real_root.join("ignored")), + Some("ignored"), + |keep, ctx| { + walk( + &real_root, + ctx, + walk::Options { + for_deletion: Some(Default::default()), + emit_ignored: Some(CollapseDirectory), + ..options() + }, + keep, + ) + }, + None::<&str>, + ); + + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 2, + returned_entries: entries.len(), + seen_entries: 2, + } + ); + assert_eq!( + entries, + [ + entryps("ignored/b", Ignored(Expendable), File, Prefix), + ], + "the traversal starts from the top, but we automatically prevent the 'd' directory from being deleted by stopping its collapse." + ); + + let real_root = gix_path::realpath(fixture("subdir-untracked-and-ignored"))?; + let ((out, _root), entries) = collect_filtered_with_cwd( + &real_root, + None, + Some("d/d/generated"), + |keep, ctx| { + walk( + &real_root, + ctx, + walk::Options { + for_deletion: Some(Default::default()), + emit_ignored: Some(CollapseDirectory), + emit_pruned: false, + ..options() + }, + keep, + ) + }, + Some(":/*generated/*"), + ); + + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 8, + returned_entries: entries.len(), + seen_entries: 26 + } + ); + assert_eq!( + entries, + [ + entryps("d/d/generated/b", Ignored(Expendable), File, WildcardMatch), + entryps("d/generated", Ignored(Expendable), Directory, WildcardMatch), + entryps("generated", Ignored(Expendable), Directory, WildcardMatch), + ], + "'d/d/generated/b' is there because the parent directory isn't allowed to fold due to the CWD rule." + ); + + Ok(()) +} + #[test] fn only_untracked_with_cwd_handling() -> crate::Result { let root = fixture("only-untracked"); @@ -316,7 +431,6 @@ fn only_untracked_with_cwd_handling() -> crate::Result { "even if the traversal root is for deletion, unless the CWD is set it will be collapsed (no special cases)" ); - // Needs real-root to assure let real_root = gix_path::realpath(&root)?; let ((out, _root), entries) = collect_filtered_with_cwd( &real_root, @@ -374,15 +488,15 @@ fn only_untracked_with_cwd_handling() -> crate::Result { keep, ) }, - Some("../d"), + Some("../d/"), ); assert_eq!( out, walk::Outcome { - read_dir_calls: 3, + read_dir_calls: 2, returned_entries: entries.len(), - seen_entries: 8, + seen_entries: 4, } ); assert_eq!( @@ -429,7 +543,7 @@ fn only_untracked_with_pathspec() -> crate::Result { ); assert_eq!( entries, - [entryps("d", Untracked, Directory, Prefix),], + [entryps("d", Untracked, Directory, Prefix)], "this is equivalent as if we use a prefix, as we end up starting the traversal from 'd'" ); @@ -720,7 +834,7 @@ fn expendable_and_precious() { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitignore", Tracked, File), entry("a.o", Ignored(Expendable), File), entry("all-expendable", Ignored(Expendable), Directory), @@ -969,7 +1083,7 @@ fn untracked_and_ignored_for_deletion_negative_wildcard_spec() -> crate::Result assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitignore", Untracked, File), entry("a.o", Ignored(Expendable), File), entry("b.o", Ignored(Expendable), File), @@ -1024,7 +1138,7 @@ fn untracked_and_ignored_for_deletion_positive_wildcard_spec() -> crate::Result assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).no_kind().with_property(DotGit), entry_nomatch(".gitignore", Pruned, File), entry_nomatch("a.o", Ignored(Expendable), File), entry_nomatch("b.o", Ignored(Expendable), File), @@ -1077,7 +1191,7 @@ fn untracked_and_ignored_for_deletion_nonmatching_wildcard_spec() -> crate::Resu assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).no_match().with_property(DotGit), entry_nomatch(".gitignore", Pruned, File), entry_nomatch("a.o", Ignored(Expendable), File), entry_nomatch("b.o", Ignored(Expendable), File), @@ -1149,7 +1263,7 @@ fn nested_ignored_dirs_for_deletion_nonmatching_wildcard_spec() -> crate::Result assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit), entry_nomatch(".gitignore", Pruned, File), entry_nomatch("bare/HEAD", Pruned, File), entry_nomatch("bare/info/exclude", Pruned, File), @@ -1192,7 +1306,7 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitignore", Tracked, File).with_index_kind(File), entry("ignored", Ignored(Expendable), Directory), entry("other", Ignored(Expendable), Directory), @@ -1234,8 +1348,8 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { assert_eq!( entries, [ - entry_nokind(".git", DotGit), - entry_nokind("ignored/d/.git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).no_match(), + entry_nokind("ignored/d/.git", Ignored(Expendable) ).with_property(DotGit).with_match(WildcardMatch), entryps("ignored/d/.gitignore", Ignored(Expendable), File, WildcardMatch), entryps("ignored/d/a.o", Ignored(Expendable), File, WildcardMatch), entryps( @@ -1253,7 +1367,59 @@ fn expendable_and_precious_in_ignored_dir_with_pathspec() -> crate::Result { It seems strange that 'precious' isn't precious, while 'all-precious' is. However, the ignore-search is special as it goes backward through directories (using directory-declarations), and aborts if it matched. Thus it finds that '$/all-precious/' matched, but in the other cases it maches 'ignored/'. - 'other' gets folded and inherits, just like before." + 'other' gets folded and inherits, just like before. + Also, look how the ignore-state overrides the prune-default for DotGit kinds, to have more finegrained classification." + ); + + let ((out, _root), entries) = collect_filtered( + &root, + None, + |keep, ctx| { + walk( + &root, + ctx, + walk::Options { + emit_ignored: Some(CollapseDirectory), + emit_untracked: CollapseDirectory, + emit_pruned: true, + for_deletion: None, + ..options() + }, + keep, + ) + }, + Some("*ignored*"), + ); + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 9, + returned_entries: entries.len(), + seen_entries: 19, + }, + ); + + assert_eq!( + entries, + [ + entry_nokind(".git", Pruned).with_property(DotGit).no_match(), + entry_nokind("ignored/d/.git", Pruned) + .with_property(DotGit) + .with_match(WildcardMatch), + entryps("ignored/d/.gitignore", Ignored(Expendable), File, WildcardMatch), + entryps("ignored/d/a.o", Ignored(Expendable), File, WildcardMatch), + entryps( + "ignored/d/all-expendable", + Ignored(Expendable), + Directory, + WildcardMatch + ), + entryps("ignored/d/all-precious", Ignored(Precious), Directory, WildcardMatch), + entryps("ignored/d/mixed", Ignored(Expendable), Directory, WildcardMatch), + entryps("ignored/d/precious", Ignored(Expendable), File, WildcardMatch), + entryps("other", Ignored(Expendable), Directory, WildcardMatch), + ], + "The same as above, but without delete mode, we don't upgrade the status of ignored dot-git entries" ); Ok(()) } @@ -1331,7 +1497,7 @@ fn untracked_and_ignored() -> crate::Result { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).no_match(), entry_nomatch(".gitignore", Pruned, File), entryps("d/d/a", Untracked, File, WildcardMatch), ], @@ -2058,7 +2224,7 @@ fn precious_are_not_expendable() { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitignore", Tracked, File), entry("a.o", Ignored(Expendable), File), entry("d/a", Tracked, File), @@ -2327,7 +2493,7 @@ fn worktree_root_can_be_symlink() -> crate::Result { #[test] fn root_may_not_go_through_dot_git() -> crate::Result { let root = fixture("with-nested-dot-git"); - for dir in ["", "subdir"] { + for (dir, expected_pathspec) in [("", Some(Verbatim)), ("subdir", None)] { let troot = root.join("dir").join(".git").join(dir); let ((out, _root), entries) = collect(&root, Some(&troot), |keep, ctx| { walk(&root, ctx, options_emit_all(), keep) @@ -2342,7 +2508,11 @@ fn root_may_not_go_through_dot_git() -> crate::Result { ); assert_eq!( entries, - [entry_nomatch("dir/.git", DotGit, Directory)], + [{ + let mut e = entry("dir/.git", Pruned, Directory).with_property(DotGit); + e.0.pathspec_match = expected_pathspec; + e + }], "no traversal happened as root passes though .git" ); } @@ -2535,7 +2705,7 @@ fn walk_with_submodule() -> crate::Result { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitmodules", Tracked, File), entry("dir/file", Tracked, File), entry("submodule", Tracked, Repository) @@ -2652,7 +2822,7 @@ fn submodules() -> crate::Result { } ); let expected_content = [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitmodules", Tracked, File).with_index_kind(File), entry("a/b", Tracked, Repository).with_index_kind(Repository), entry("empty", Tracked, File).with_index_kind(File), @@ -2857,7 +3027,7 @@ fn root_with_dir_that_is_tracked_and_ignored() -> crate::Result { assert_eq!( entries, [ - entry_nokind(".git", DotGit), + entry_nokind(".git", Pruned).with_property(DotGit).with_match(Always), entry(".gitignore", Tracked, File), entry("dir/file", Tracked, File) ], @@ -2898,7 +3068,7 @@ fn empty_and_nested_untracked() -> crate::Result { assert_eq!( entries, [ - entry("empty", Untracked, EmptyDirectory), + entry("empty", Untracked, Directory).with_property(EmptyDirectory), entry("untracked/file", Untracked, File) ], "we find all untracked entries, no matter the deletion mode" @@ -2928,7 +3098,7 @@ fn empty_and_nested_untracked() -> crate::Result { assert_eq!( entries, [ - entry("empty", Untracked, EmptyDirectory), + entry("empty", Untracked, Directory).with_property(EmptyDirectory), entry("untracked", Untracked, Directory) ], "we find all untracked directories, no matter the deletion mode" @@ -3395,7 +3565,7 @@ fn untracked_and_ignored_collapse_mix() { #[test] fn root_cannot_pass_through_case_altered_capital_dot_git_if_case_insensitive() -> crate::Result { let root = fixture("with-nested-capitalized-dot-git"); - for dir in ["", "subdir"] { + for (dir, expected_pathspec) in [("", Some(Verbatim)), ("subdir", None)] { let troot = root.join("dir").join(".GIT").join(dir); let ((out, _root), entries) = collect(&root, Some(&troot), |keep, ctx| { walk( @@ -3418,7 +3588,11 @@ fn root_cannot_pass_through_case_altered_capital_dot_git_if_case_insensitive() - ); assert_eq!( entries, - [entry_nomatch("dir/.GIT", DotGit, Directory)], + [{ + let mut e = entry("dir/.GIT", Pruned, Directory).with_property(DotGit); + e.0.pathspec_match = expected_pathspec; + e + }], "no traversal happened as root passes though .git, it compares in a case-insensitive fashion" ); } @@ -3476,7 +3650,9 @@ fn partial_checkout_cone_and_non_one() -> crate::Result { ); assert_eq!( entries, - [entry("d", TrackedExcluded, Directory)], + [entry("d", Pruned, Directory) + .with_index_kind(Directory) + .with_property(TrackedExcluded)], "{fixture_name}: we avoid entering excluded sparse-checkout directories even if they are present on disk,\ no matter with cone or without." ); diff --git a/gix-dir/tests/walk_utils/mod.rs b/gix-dir/tests/walk_utils/mod.rs index aef8ef5bd3c..13ffcb36851 100644 --- a/gix-dir/tests/walk_utils/mod.rs +++ b/gix-dir/tests/walk_utils/mod.rs @@ -51,6 +51,7 @@ pub fn entry_nomatch( Entry { rela_path: rela_path.as_ref().to_owned(), status, + property: None, disk_kind: Some(disk_kind), index_kind: index_kind_from_status(status, disk_kind), pathspec_match: None, @@ -64,6 +65,7 @@ pub fn entry_nokind(rela_path: impl AsRef, status: entry::Status) -> (Entr Entry { rela_path: rela_path.as_ref().to_owned(), status, + property: None, disk_kind: None, index_kind: None, pathspec_match: None, @@ -82,6 +84,7 @@ pub fn entryps( Entry { rela_path: rela_path.as_ref().to_owned(), status, + property: None, disk_kind: Some(disk_kind), index_kind: index_kind_from_status(status, disk_kind), pathspec_match: Some(pathspec_match), @@ -100,6 +103,7 @@ pub fn entry_dirstat( Entry { rela_path: rela_path.as_ref().to_owned(), status, + property: None, disk_kind: Some(disk_kind), index_kind: index_kind_from_status(status, disk_kind), pathspec_match: Some(entry::PathspecMatch::Always), @@ -120,6 +124,7 @@ pub fn entryps_dirstat( Entry { rela_path: rela_path.as_ref().to_owned(), status, + property: None, disk_kind: Some(disk_kind), index_kind: index_kind_from_status(status, disk_kind), pathspec_match: Some(pathspec_match), @@ -129,11 +134,15 @@ pub fn entryps_dirstat( } fn index_kind_from_status(status: entry::Status, disk_kind: entry::Kind) -> Option { - matches!(status, entry::Status::Tracked | entry::Status::TrackedExcluded).then_some(disk_kind) + matches!(status, entry::Status::Tracked).then_some(disk_kind) } pub trait EntryExt { fn with_index_kind(self, index_kind: entry::Kind) -> Self; + fn with_property(self, flags: entry::Property) -> Self; + fn with_match(self, m: entry::PathspecMatch) -> Self; + fn no_match(self) -> Self; + fn no_kind(self) -> Self; } impl EntryExt for (Entry, Option) { @@ -141,6 +150,23 @@ impl EntryExt for (Entry, Option) { self.0.index_kind = index_kind.into(); self } + fn with_property(mut self, property: entry::Property) -> Self { + self.0.property = property.into(); + self + } + fn with_match(mut self, m: entry::PathspecMatch) -> Self { + self.0.pathspec_match = Some(m); + self + } + + fn no_match(mut self) -> Self { + self.0.pathspec_match = None; + self + } + fn no_kind(mut self) -> Self { + self.0.disk_kind = None; + self + } } pub fn collect( From e944e74e17355b071373ad9baa3f91ef3da08c8f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 19 Feb 2024 16:47:43 +0100 Subject: [PATCH 2/7] further improvements to `gix clean` - assure it always signals that the dirwalk is 'for deletion', to have additional CWD checks. - add `--match-entries` flag to apply pathspec later, when filtering entries determined by an unfiltered dirwalk. --- gitoxide-core/src/repository/clean.rs | 33 ++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/gitoxide-core/src/repository/clean.rs b/gitoxide-core/src/repository/clean.rs index 78c83ff6e7d..f3c8c259ea8 100644 --- a/gitoxide-core/src/repository/clean.rs +++ b/gitoxide-core/src/repository/clean.rs @@ -66,10 +66,10 @@ pub(crate) mod function { match skip_hidden_repositories { Some(FindRepository::NonBare) => Some(FindNonBareRepositoriesInIgnoredDirectories), Some(FindRepository::All) => Some(FindRepositoriesInIgnoredDirectories), - None => None, + None => Some(Default::default()), } } else { - Some(IgnoredDirectoriesCanHideNestedRepositories) + Some(Default::default()) }) .classify_untracked_bare_repositories(matches!(find_untracked_repositories, FindRepository::All)) .emit_untracked(collapse_directories) @@ -88,7 +88,7 @@ pub(crate) mod function { let mut pruned_entries = 0; let mut saw_ignored_directory = false; let mut saw_untracked_directory = false; - for (entry, dir_status) in entries.into_iter() { + for (mut entry, dir_status) in entries.into_iter() { if dir_status.is_some() { if debug { writeln!( @@ -106,16 +106,15 @@ pub(crate) mod function { .map_or(false, |m| m != gix::dir::entry::PathspecMatch::Excluded); pruned_entries += usize::from(!pathspec_includes_entry); if !pathspec_includes_entry && debug { - writeln!(err, "DBG: prune '{}' as it is excluded by pathspec", entry.rela_path).ok(); + writeln!(err, "DBG: prune '{}'", entry.rela_path).ok(); } if entry.status.is_pruned() || !pathspec_includes_entry { continue; } - let mut disk_kind = entry.disk_kind.expect("present if not pruned"); let keep = match entry.status { - Status::DotGit | Status::Pruned | Status::TrackedExcluded => { - unreachable!("BUG: Pruned are skipped already as their pathspec is always None") + Status::Pruned => { + unreachable!("BUG: assumption that pruned entries have no pathspec match, but probably not") } Status::Tracked => { unreachable!("BUG: tracked aren't emitted") @@ -130,6 +129,14 @@ pub(crate) mod function { } Status::Untracked => true, }; + if entry.disk_kind.is_none() { + entry.disk_kind = workdir + .join(gix::path::from_bstr(entry.rela_path.as_bstr())) + .metadata() + .ok() + .map(|e| e.file_type().into()); + } + let mut disk_kind = entry.disk_kind.expect("present if not pruned"); if !keep { if debug { writeln!(err, "DBG: prune '{}' as -x or -p is missing", entry.rela_path).ok(); @@ -148,7 +155,7 @@ pub(crate) mod function { match disk_kind { Kind::File | Kind::Symlink => {} - Kind::EmptyDirectory | Kind::Directory => { + Kind::Directory => { if !directories { skipped_directories += 1; if debug { @@ -202,15 +209,15 @@ pub(crate) mod function { }, maybe = if execute { "removing" } else { "WOULD remove" }, suffix = match disk_kind { - Kind::File | Kind::Symlink | Kind::Directory => { - "" - } - Kind::EmptyDirectory => { + Kind::Directory if entry.property == Some(gix::dir::entry::Property::EmptyDirectory) => { " empty" } Kind::Repository => { " repository" } + Kind::File | Kind::Symlink | Kind::Directory => { + "" + } }, )?; @@ -256,7 +263,7 @@ pub(crate) mod function { })); messages.extend((pruned_entries > 0 && has_patterns).then(|| { format!( - "try to adjust your pathspec to reveal some of the {pruned_entries} pruned {entries}", + "try to adjust your pathspec to reveal some of the {pruned_entries} pruned {entries} - show with --debug", entries = plural("entry", "entries", pruned_entries) ) })); From 1a26732fe897161f9bfa397efdb07aa57f3c7341 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 24 Feb 2024 17:12:41 +0100 Subject: [PATCH 3/7] assure that we don't artificially make non-recursable directories visible A repository may be emitted on its own accord, without having to go through the folding logic which filters directories that have been 'prefix' matched. --- gix-dir/src/entry.rs | 2 +- gix-dir/src/walk/classify.rs | 11 ++++------- gix-dir/tests/fixtures/many.sh | 7 +++++++ gix-dir/tests/walk/mod.rs | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/gix-dir/src/entry.rs b/gix-dir/src/entry.rs index dfd65ffc1f6..2bd0c960bca 100644 --- a/gix-dir/src/entry.rs +++ b/gix-dir/src/entry.rs @@ -191,7 +191,7 @@ impl Status { } impl Kind { - fn is_recursable_dir(&self) -> bool { + pub(super) fn is_recursable_dir(&self) -> bool { matches!(self, Kind::Directory) } diff --git a/gix-dir/src/walk/classify.rs b/gix-dir/src/walk/classify.rs index d23f60d0d9a..4b89911d3d1 100644 --- a/gix-dir/src/walk/classify.rs +++ b/gix-dir/src/walk/classify.rs @@ -253,13 +253,6 @@ pub fn path( .map_err(Error::ExcludesAccess)? { if emit_ignored.is_some() { - if kind.map_or(false, |d| d.is_dir()) && out.pathspec_match.is_none() { - // we have patterns that didn't match at all. Try harder. - out.pathspec_match = ctx - .pathspec - .directory_matches_prefix(rela_path.as_bstr(), true) - .then_some(PathspecMatch::Prefix); - } if matches!( for_deletion, Some( @@ -275,6 +268,10 @@ pub fn path( ), ); } + if kind.map_or(false, |d| d.is_recursable_dir()) && out.pathspec_match.is_none() { + // we have patterns that didn't match at all, *yet*. We want to look inside. + out.pathspec_match = Some(PathspecMatch::Prefix); + } } return Ok(out .with_status(entry::Status::Ignored(excluded)) diff --git a/gix-dir/tests/fixtures/many.sh b/gix-dir/tests/fixtures/many.sh index 4e909af022f..a4ee18dbfea 100644 --- a/gix-dir/tests/fixtures/many.sh +++ b/gix-dir/tests/fixtures/many.sh @@ -159,6 +159,13 @@ git init partial-checkout-non-cone mkdir d && touch d/file-created-manually ) +git init precious-nested-repository +(cd precious-nested-repository + echo '$precious*/' > .gitignore + git init precious-repo + git add .gitignore && git commit -m "init" +) + git init only-untracked (cd only-untracked >a diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index 91c5518ed03..c17a145a252 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -1208,6 +1208,38 @@ fn untracked_and_ignored_for_deletion_nonmatching_wildcard_spec() -> crate::Resu ); Ok(()) } +#[test] +fn nested_precious_repo_respects_wildcards() -> crate::Result { + let root = fixture("precious-nested-repository"); + for for_deletion in [ + Some(ForDeletionMode::FindNonBareRepositoriesInIgnoredDirectories), + Some(ForDeletionMode::FindRepositoriesInIgnoredDirectories), + ] { + let (_out, entries) = collect_filtered( + &root, + None, + |keep, ctx| { + walk( + &root, + ctx, + walk::Options { + emit_ignored: Some(CollapseDirectory), + emit_untracked: CollapseDirectory, + emit_pruned: false, + for_deletion, + ..options() + }, + keep, + ) + }, + Some("*foo/"), + ); + // NOTE: do not use `_out` as `.git` directory contents can change, it's controlled by Git, causing flakiness. + + assert_eq!(entries, [], "nothing matches, of course"); + } + Ok(()) +} #[test] fn nested_ignored_dirs_for_deletion_nonmatching_wildcard_spec() -> crate::Result { From 9863d75147445d3a598fe1339d88c353850a5984 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 20 Feb 2024 19:33:11 +0100 Subject: [PATCH 4/7] feat: add `gix clean --patterns-for-entries|-m` to help with wildcards. --- gitoxide-core/src/repository/clean.rs | 40 +++++++++++++++++++++++---- src/plumbing/main.rs | 2 ++ src/plumbing/options/mod.rs | 6 ++++ 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/gitoxide-core/src/repository/clean.rs b/gitoxide-core/src/repository/clean.rs index f3c8c259ea8..1eea6ddd378 100644 --- a/gitoxide-core/src/repository/clean.rs +++ b/gitoxide-core/src/repository/clean.rs @@ -15,6 +15,7 @@ pub struct Options { pub precious: bool, pub directories: bool, pub repositories: bool, + pub pathspec_matches_result: bool, pub skip_hidden_repositories: Option, pub find_untracked_repositories: FindRepository, } @@ -46,6 +47,7 @@ pub(crate) mod function { repositories, skip_hidden_repositories, find_untracked_repositories, + pathspec_matches_result, }: Options, ) -> anyhow::Result<()> { if format != OutputFormat::Human { @@ -56,6 +58,7 @@ pub(crate) mod function { }; let index = repo.index_or_empty()?; + let pathspec_for_dirwalk = !pathspec_matches_result; let has_patterns = !patterns.is_empty(); let mut collect = InterruptableCollect::default(); let collapse_directories = CollapseDirectory; @@ -76,9 +79,29 @@ pub(crate) mod function { .emit_ignored(Some(collapse_directories)) .empty_patterns_match_prefix(true) .emit_empty_directories(true); - repo.dirwalk(&index, patterns, options, &mut collect)?; - let prefix = repo.prefix()?.unwrap_or(Path::new("")); + repo.dirwalk( + &index, + if pathspec_for_dirwalk { + patterns.clone() + } else { + Vec::new() + }, + options, + &mut collect, + )?; + let mut pathspec = pathspec_matches_result + .then(|| { + repo.pathspec( + true, + patterns, + true, + &index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + ) + }) + .transpose()?; + let prefix = repo.prefix()?.unwrap_or(Path::new("")); let entries = collect.inner.into_entries_by_path(); let mut entries_to_clean = 0; let mut skipped_directories = 0; @@ -101,9 +124,14 @@ pub(crate) mod function { continue; } - let pathspec_includes_entry = entry - .pathspec_match - .map_or(false, |m| m != gix::dir::entry::PathspecMatch::Excluded); + let pathspec_includes_entry = match pathspec.as_mut() { + None => entry + .pathspec_match + .map_or(false, |m| m != gix::dir::entry::PathspecMatch::Excluded), + Some(pathspec) => pathspec + .pattern_matching_relative_path(entry.rela_path.as_bstr(), entry.disk_kind.map(|k| k.is_dir())) + .map_or(false, |m| !m.is_excluded()), + }; pruned_entries += usize::from(!pathspec_includes_entry); if !pathspec_includes_entry && debug { writeln!(err, "DBG: prune '{}'", entry.rela_path).ok(); @@ -114,7 +142,7 @@ pub(crate) mod function { let keep = match entry.status { Status::Pruned => { - unreachable!("BUG: assumption that pruned entries have no pathspec match, but probably not") + unreachable!("BUG: we skipped these above") } Status::Tracked => { unreachable!("BUG: tracked aren't emitted") diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index b2aa0e92214..32dc404cffa 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -155,6 +155,7 @@ pub fn main() -> Result<()> { directories, pathspec, repositories, + pathspec_matches_result, skip_hidden_repositories, find_untracked_repositories, }) => prepare_and_run( @@ -178,6 +179,7 @@ pub fn main() -> Result<()> { precious, directories, repositories, + pathspec_matches_result, skip_hidden_repositories: skip_hidden_repositories.map(Into::into), find_untracked_repositories: find_untracked_repositories.into(), }, diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 91844b330ff..a437bf641c4 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -521,6 +521,12 @@ pub mod clean { /// Remove nested repositories. #[arg(long, short = 'r')] pub repositories: bool, + /// Pathspec patterns are used to match the result of the dirwalk, not the dirwalk itself. + /// + /// Use this if there is trouble using wildcard pathspecs, which affect the directory walk + /// in reasonable, but often unexpected ways. + #[arg(long, short = 'm')] + pub pathspec_matches_result: bool, /// Enter ignored directories to skip repositories contained within. #[arg(long)] pub skip_hidden_repositories: Option, From 8959b2153f32c2fba599e3dfa1720e155b462b94 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 25 Feb 2024 10:04:37 +0100 Subject: [PATCH 5/7] Allow the `-n` argument as no-op to make them more compatible. --- src/plumbing/main.rs | 1 + src/plumbing/options/mod.rs | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 32dc404cffa..aeb589c584f 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -149,6 +149,7 @@ pub fn main() -> Result<()> { #[cfg(feature = "gitoxide-core-tools-clean")] Subcommands::Clean(crate::plumbing::options::clean::Command { debug, + dry_run: _, execute, ignored, precious, diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index a437bf641c4..690ea56ecaa 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -274,7 +274,6 @@ pub mod corpus { } pub mod config { - use gix::bstr::BString; /// Print all entries in a configuration file or access other sub-commands @@ -506,6 +505,9 @@ pub mod clean { /// Print additional debug information to help understand decisions it made. #[arg(long)] pub debug: bool, + /// A dummy to easy with muscle-memory. This flag is assumed if provided or not, and has no effect. + #[arg(short = 'n', long)] + pub dry_run: bool, /// Actually perform the operation, which deletes files on disk without chance of recovery. #[arg(long, short = 'e')] pub execute: bool, @@ -689,6 +691,7 @@ pub mod revision { DiffOrGit, } } + #[derive(Debug, clap::Subcommand)] #[clap(visible_alias = "rev", visible_alias = "r")] pub enum Subcommands { @@ -859,7 +862,6 @@ pub mod index { } pub mod submodule { - #[derive(Debug, clap::Parser)] pub struct Platform { #[clap(subcommand)] From 4d5767cd394d755104aa7f0c1ed5b8e01bf74b12 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 25 Feb 2024 10:23:11 +0100 Subject: [PATCH 6/7] Make it even harder to remove your own CWD --- gix-dir/src/entry.rs | 6 +++ gix-dir/src/walk/readdir.rs | 15 ++++--- gix-dir/tests/fixtures/many.sh | 8 ++++ gix-dir/tests/walk/mod.rs | 80 ++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 6 deletions(-) diff --git a/gix-dir/src/entry.rs b/gix-dir/src/entry.rs index 2bd0c960bca..81c6fae5204 100644 --- a/gix-dir/src/entry.rs +++ b/gix-dir/src/entry.rs @@ -9,6 +9,12 @@ pub enum Property { DotGit, /// The entry is a directory, and that directory is empty. EmptyDirectory, + /// The entry is a directory, it is empty and the current working directory. + /// + /// The caller should pay special attention to this very special case, as it is indeed only possible to run into it + /// while traversing the directory for deletion. + /// Non-empty directory will never be collapsed, hence if they are working directories, they naturally become unobservable. + EmptyDirectoryAndCWD, /// Always in conjunction with a directory on disk that is also known as cone-mode sparse-checkout exclude marker /// - i.e. a directory that is excluded, so its whole content is excluded and not checked out nor is part of the index. /// diff --git a/gix-dir/src/walk/readdir.rs b/gix-dir/src/walk/readdir.rs index 0d28036444d..05bc5a93301 100644 --- a/gix-dir/src/walk/readdir.rs +++ b/gix-dir/src/walk/readdir.rs @@ -94,6 +94,7 @@ pub(super) fn recursive( num_entries, state, &mut prevent_collapse, + current, current_bstr.as_bstr(), current_info, opts, @@ -157,7 +158,7 @@ impl State { pub(super) fn emit_remaining( &mut self, - is_top_level: bool, + may_collapse: bool, opts: Options, out: &mut walk::Outcome, delegate: &mut dyn walk::Delegate, @@ -168,7 +169,7 @@ impl State { _ = Mark { start_index: 0, - may_collapse: is_top_level, + may_collapse, } .emit_all_held(self, opts, out, delegate); } @@ -186,6 +187,7 @@ impl Mark { num_entries: usize, state: &mut State, prevent_collapse: &mut bool, + dir_path: &Path, dir_rela_path: &BStr, dir_info: classify::Outcome, opts: Options, @@ -195,19 +197,20 @@ impl Mark { ) -> walk::Action { if num_entries == 0 { let empty_info = classify::Outcome { - property: if num_entries == 0 { + property: { assert_ne!( dir_info.disk_kind, Some(entry::Kind::Repository), "BUG: it shouldn't be possible to classify an empty dir as repository" ); - if dir_info.property.is_none() { + if !state.may_collapse(dir_path) { + *prevent_collapse = true; + entry::Property::EmptyDirectoryAndCWD.into() + } else if dir_info.property.is_none() { entry::Property::EmptyDirectory.into() } else { dir_info.property } - } else { - dir_info.property }, pathspec_match: ctx .pathspec diff --git a/gix-dir/tests/fixtures/many.sh b/gix-dir/tests/fixtures/many.sh index a4ee18dbfea..0c17c4d3a3b 100644 --- a/gix-dir/tests/fixtures/many.sh +++ b/gix-dir/tests/fixtures/many.sh @@ -178,6 +178,14 @@ git init only-untracked >c ) +git init ignored-with-empty +(cd ignored-with-empty + echo "/target/" >> .gitignore + git add .gitignore && git commit -m "init" + mkdir -p target/empty target/debug target/release + touch target/debug/a target/release/b +) + cp -R only-untracked subdir-untracked (cd subdir-untracked git add . diff --git a/gix-dir/tests/walk/mod.rs b/gix-dir/tests/walk/mod.rs index c17a145a252..823bf360370 100644 --- a/gix-dir/tests/walk/mod.rs +++ b/gix-dir/tests/walk/mod.rs @@ -357,6 +357,86 @@ fn ignored_dir_with_cwd_handling() -> crate::Result { Ok(()) } +#[test] +fn ignored_with_cwd_handling() -> crate::Result { + let root = gix_path::realpath(fixture("ignored-with-empty"))?; + let ((out, _root), entries) = collect_filtered_with_cwd( + &root, + None, + None, + |keep, ctx| { + walk( + &root, + ctx, + walk::Options { + for_deletion: Some(Default::default()), + emit_ignored: Some(CollapseDirectory), + emit_empty_directories: true, + ..options() + }, + keep, + ) + }, + None::<&str>, + ); + + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 1, + returned_entries: entries.len(), + seen_entries: 3, + } + ); + + assert_eq!( + entries, + [entry("target", Ignored(Expendable), Directory),], + "the baseline shows the content" + ); + + let ((out, _root), entries) = collect_filtered_with_cwd( + &root, + Some(&root), + Some("target/empty"), + |keep, ctx| { + walk( + &root, + ctx, + walk::Options { + for_deletion: Some(Default::default()), + emit_ignored: Some(CollapseDirectory), + emit_empty_directories: true, + ..options() + }, + keep, + ) + }, + Some("target"), + ); + + assert_eq!( + out, + walk::Outcome { + read_dir_calls: 5, + returned_entries: entries.len(), + seen_entries: 7, + } + ); + + assert_eq!( + entries, + [ + entryps("target/debug", Ignored(Expendable), Directory, Prefix), + entryps("target/empty", Ignored(Expendable), Directory, Prefix).with_property(EmptyDirectoryAndCWD), + entryps("target/release", Ignored(Expendable), Directory, Prefix), + ], + "it detects empty as CWD (very special case) and lists it as usual, while also preventing collapse to assure \ + to not accidentally end up trying to delete a parent directory" + ); + Ok(()) +} + #[test] fn only_untracked_with_cwd_handling() -> crate::Result { let root = fixture("only-untracked"); From aa7c1908b82e3e23859a4c663faa40ec54611919 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 25 Feb 2024 15:06:50 +0100 Subject: [PATCH 7/7] adapt to changes in `gix-dir` --- gitoxide-core/src/repository/clean.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/gitoxide-core/src/repository/clean.rs b/gitoxide-core/src/repository/clean.rs index 1eea6ddd378..fe91e3d2b38 100644 --- a/gitoxide-core/src/repository/clean.rs +++ b/gitoxide-core/src/repository/clean.rs @@ -210,6 +210,11 @@ pub(crate) mod function { saw_ignored_directory |= is_ignored; saw_untracked_directory |= entry.status == gix::dir::entry::Status::Untracked; } + + if gix::interrupt::is_triggered() { + execute = false; + } + let mut may_remove_this_entry = execute; writeln!( out, "{maybe}{suffix} {}{} {status}", @@ -235,7 +240,18 @@ pub(crate) mod function { "".into() }, }, - maybe = if execute { "removing" } else { "WOULD remove" }, + maybe = if entry.property == Some(gix::dir::entry::Property::EmptyDirectoryAndCWD) { + may_remove_this_entry = false; + if execute { + "Refusing to remove empty current working directory" + } else { + "Would refuse to remove empty current working directory" + } + } else if execute { + "removing" + } else { + "WOULD remove" + }, suffix = match disk_kind { Kind::Directory if entry.property == Some(gix::dir::entry::Property::EmptyDirectory) => { " empty" @@ -249,10 +265,7 @@ pub(crate) mod function { }, )?; - if gix::interrupt::is_triggered() { - execute = false; - } - if execute { + if may_remove_this_entry { let path = workdir.join(entry_path); if disk_kind.is_dir() { std::fs::remove_dir_all(path)?;