Skip to content

Commit

Permalink
feat!: add status::Platform::into_iter() for obtaining a complete s…
Browse files Browse the repository at this point in the history
…tatus.

Note that it is still possible to disable the head-index status.

Types moved around, effectivey removing the `iter::` module for most
more general types, i.e. those that are quite genericlally useful in
a status.
  • Loading branch information
Byron committed Jan 2, 2025
1 parent a6f397f commit 5b8140f
Show file tree
Hide file tree
Showing 8 changed files with 524 additions and 385 deletions.
2 changes: 1 addition & 1 deletion gitoxide-core/src/repository/status.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::bail;
use gix::bstr::{BStr, BString, ByteSlice};
use gix::status::index_worktree::iter::Item;
use gix::status::index_worktree::Item;
use gix_status::index_as_worktree::{Change, Conflict, EntryStatus};
use std::path::Path;

Expand Down
484 changes: 112 additions & 372 deletions gix/src/status/index_worktree.rs

Large diffs are not rendered by default.

244 changes: 244 additions & 0 deletions gix/src/status/iter/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
use crate::bstr::BString;
use crate::config::cache::util::ApplyLeniencyDefault;
use crate::status::index_worktree::BuiltinSubmoduleStatus;
use crate::status::{index_worktree, Platform};
use crate::worktree::IndexPersistedOrInMemory;
use gix_status::index_as_worktree::{Change, EntryStatus};

pub(super) mod types;
use types::{ApplyChange, Item, Iter, Outcome};

/// Lifecycle
impl<Progress> Platform<'_, Progress>
where
Progress: gix_features::progress::Progress,
{
/// Turn the platform into an iterator for changes between the head-tree and the index, and the index and the working tree,
/// while optionally listing untracked and/or ignored files.
///
/// * `patterns`
/// - Optional patterns to use to limit the paths to look at. If empty, all paths are considered.
#[doc(alias = "diff_index_to_workdir", alias = "git2")]
pub fn into_iter(
self,
patterns: impl IntoIterator<Item = BString>,
) -> Result<Iter, crate::status::into_iter::Error> {
let index = match self.index {
None => IndexPersistedOrInMemory::Persisted(self.repo.index_or_empty()?),
Some(index) => index,
};

let obtain_tree_id = || -> Result<Option<gix_hash::ObjectId>, crate::status::into_iter::Error> {
Ok(match self.head_tree {
Some(None) => Some(self.repo.head_tree_id()?.into()),
Some(Some(tree_id)) => Some(tree_id.into()),
None => None,
})
};

let skip_hash = self
.repo
.config
.resolved
.boolean(crate::config::tree::Index::SKIP_HASH)
.map(|res| crate::config::tree::Index::SKIP_HASH.enrich_error(res))
.transpose()
.with_lenient_default(self.repo.config.lenient_config)?
.unwrap_or_default();
let should_interrupt = self.should_interrupt.clone().unwrap_or_default();
let submodule = BuiltinSubmoduleStatus::new(self.repo.clone().into_sync(), self.submodules)?;
#[cfg(feature = "parallel")]
{
let (tx, rx) = std::sync::mpsc::channel();
let mut collect = Collect { tx };
let patterns: Vec<_> = patterns.into_iter().collect();
let join = std::thread::Builder::new()
.name("gix::status::index_worktree::producer".into())
.spawn({
let repo = self.repo.clone().into_sync();
let options = self.index_worktree_options;
let should_interrupt = should_interrupt.clone();
let mut progress = self.progress;
move || -> Result<_, index_worktree::Error> {
let repo = repo.to_thread_local();
let out = repo.index_worktree_status(
&index,
patterns,
&mut collect,
gix_status::index_as_worktree::traits::FastEq,
submodule,
&mut progress,
&should_interrupt,
options,
)?;
Ok(Outcome {
index_worktree: out,
index,
changes: None,
skip_hash,
})
}
})
.map_err(crate::status::into_iter::Error::SpawnThread)?;

Ok(Iter {
rx_and_join: Some((rx, join)),
should_interrupt,
index_changes: Vec::new(),
out: None,
})
}
#[cfg(not(feature = "parallel"))]
{
let mut collect = Collect { items: Vec::new() };

let repo = self.repo.clone().into_sync();
let options = self.index_worktree_options;
let mut progress = self.progress;
let repo = repo.to_thread_local();
let items = match obtain_tree_id()? {
Some(tree_id) => {
// self.repo.tree_index_status(&tree_id);
todo!()
}
None => Vec::new().into_iter(),
};
let out = repo.index_worktree_status(
&index,
patterns,
&mut collect,
gix_status::index_as_worktree::traits::FastEq,
submodule,
&mut progress,
&should_interrupt,
options,
)?;
let mut iter = Iter {
items,
index_changes: Vec::new(),
out: None,
};
let mut out = Outcome {
index_worktree: out,
index,
changes: None,
skip_hash,
};
let items = collect
.items
.into_iter()
.filter_map(|item| iter.maybe_keep_index_change(item))
.collect::<Vec<_>>();
out.changes = (!iter.index_changes.is_empty()).then(|| std::mem::take(&mut iter.index_changes));
iter.items = items.into_iter();
iter.out = Some(out);
Ok(iter)
}
}
}

/// The error returned for each item returned by [`Iter`].
pub type Error = index_worktree::Error;

impl Iterator for Iter {
type Item = Result<Item, Error>;

fn next(&mut self) -> Option<Self::Item> {
#[cfg(feature = "parallel")]
loop {
let (rx, _join) = self.rx_and_join.as_ref()?;
match rx.recv().ok() {
Some(item) => {
if let Some(item) = self.maybe_keep_index_change(item) {
break Some(Ok(item));
}
continue;
}
None => {
let (_rx, handle) = self.rx_and_join.take()?;
break match handle.join().expect("no panic") {
Ok(mut out) => {
out.changes = Some(std::mem::take(&mut self.index_changes));
self.out = Some(out);
None
}
Err(err) => Some(Err(err)),
};
}
}
}
#[cfg(not(feature = "parallel"))]
self.items.next().map(Ok)
}
}

/// Access
impl Iter {
/// Return the outcome of the iteration, or `None` if the iterator isn't fully consumed.
pub fn outcome_mut(&mut self) -> Option<&mut Outcome> {
self.out.as_mut()
}

/// Turn the iterator into the iteration outcome, which is `None` on error or if the iteration
/// isn't complete.
pub fn into_outcome(mut self) -> Option<Outcome> {
self.out.take()
}
}

impl Iter {
fn maybe_keep_index_change(&mut self, item: Item) -> Option<Item> {
let change = match item {
Item::IndexWorktree(index_worktree::Item::Modification {
status: EntryStatus::NeedsUpdate(stat),
entry_index,
..
}) => (entry_index, ApplyChange::NewStat(stat)),
Item::IndexWorktree(index_worktree::Item::Modification {
status:
EntryStatus::Change(Change::Modification {
set_entry_stat_size_zero,
..
}),
entry_index,
..
}) if set_entry_stat_size_zero => (entry_index, ApplyChange::SetSizeToZero),
_ => return Some(item),
};

self.index_changes.push(change);
None
}
}

#[cfg(feature = "parallel")]
impl Drop for Iter {
fn drop(&mut self) {
crate::util::parallel_iter_drop(self.rx_and_join.take(), &self.should_interrupt);
}
}

struct Collect {
#[cfg(feature = "parallel")]
tx: std::sync::mpsc::Sender<Item>,
#[cfg(not(feature = "parallel"))]
items: Vec<Item>,
}

impl<'index> gix_status::index_as_worktree_with_renames::VisitEntry<'index> for Collect {
type ContentChange =
<gix_status::index_as_worktree::traits::FastEq as gix_status::index_as_worktree::traits::CompareBlobs>::Output;
type SubmoduleStatus = <BuiltinSubmoduleStatus as gix_status::index_as_worktree::traits::SubmoduleStatus>::Output;

fn visit_entry(
&mut self,
entry: gix_status::index_as_worktree_with_renames::Entry<'index, Self::ContentChange, Self::SubmoduleStatus>,
) {
// NOTE: we assume that the receiver triggers interruption so the operation will stop if the receiver is down.
let item = Item::IndexWorktree(entry.into());
#[cfg(feature = "parallel")]
self.tx.send(item).ok();
#[cfg(not(feature = "parallel"))]
self.items.push(item);
}
}
117 changes: 117 additions & 0 deletions gix/src/status/iter/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
use crate::status::index_worktree;
use crate::worktree::IndexPersistedOrInMemory;

/// An iterator for changes between the index and the worktree and the head-tree and the index.
///
/// Note that depending on the underlying configuration, there might be a significant delay until the first
/// item is received due to the buffering necessary to perform rename tracking and/or sorting.
///
/// ### Submodules
///
/// Note that submodules can be set to 'inactive', which will not exclude them from the status operation, similar to
/// how `git status` includes them.
///
/// ### Index Changes
///
/// Changes to the index are collected, and it's possible to write the index back using [Outcome::write_changes()].
/// Note that these changes are not observable, they will always be kept.
///
/// ### Parallel Operation
///
/// Note that without the `parallel` feature, the iterator becomes 'serial', which means all status will be computed in advance
/// and it's non-interruptible, yielding worse performance for is-dirty checks for instance as interruptions won't happen.
/// It's a crutch that is just there to make single-threaded applications possible at all, as it's not really an iterator
/// anymore. If this matters, better run [Repository::index_worktree_status()](crate::Repository::index_worktree_status) by hand
/// as it provides all control one would need, just not as an iterator.
///
/// Also, even with `parallel` set, the first call to `next()` will block until there is an item available, without a chance
/// to interrupt unless [`status::Platform::should_interrupt_*()`](crate::status::Platform::should_interrupt_shared()) was
/// configured.
pub struct Iter {
#[cfg(feature = "parallel")]
#[allow(clippy::type_complexity)]
pub(super) rx_and_join: Option<(
std::sync::mpsc::Receiver<Item>,
std::thread::JoinHandle<Result<Outcome, index_worktree::Error>>,
)>,
#[cfg(feature = "parallel")]
pub(super) should_interrupt: crate::status::OwnedOrStaticAtomicBool,
/// Without parallelization, the iterator has to buffer all changes in advance.
#[cfg(not(feature = "parallel"))]
pub(super) items: std::vec::IntoIter<Item>,
/// The outcome of the operation, only available once the operation has ended.
pub(in crate::status) out: Option<Outcome>,
/// The set of `(entry_index, change)` we extracted in order to potentially write back the index with the changes applied.
pub(super) index_changes: Vec<(usize, ApplyChange)>,
}

/// The item produced by the iterator
#[derive(Clone, PartialEq, Debug)]
pub enum Item {
/// A change between the index and the worktree.
///
/// Note that untracked changes are also collected here.
IndexWorktree(index_worktree::Item),
/// A change between the three of `HEAD` and the index.
TreeIndex,
}

/// The data the thread sends over to the receiving iterator.
pub struct Outcome {
/// The outcome of the index-to-worktree comparison operation.
pub index_worktree: gix_status::index_as_worktree_with_renames::Outcome,
/// The index that was used for the operation.
pub index: IndexPersistedOrInMemory,
pub(super) skip_hash: bool,
pub(super) changes: Option<Vec<(usize, ApplyChange)>>,
}

impl Outcome {
/// Returns `true` if the index has received currently unapplied changes that *should* be written back.
///
/// If they are not written back, subsequent `status` operations will take longer to complete, whereas the
/// additional work can be prevented by writing the changes back to the index.
pub fn has_changes(&self) -> bool {
self.changes.as_ref().map_or(false, |changes| !changes.is_empty())
}

/// Write the changes if there are any back to the index file.
/// This can only be done once as the changes are consumed in the process, if there were any.
pub fn write_changes(&mut self) -> Option<Result<(), gix_index::file::write::Error>> {
let _span = gix_features::trace::coarse!("gix::status::index_worktree::Outcome::write_changes()");
let changes = self.changes.take()?;
let mut index = match &self.index {
IndexPersistedOrInMemory::Persisted(persisted) => (***persisted).clone(),
IndexPersistedOrInMemory::InMemory(index) => index.clone(),
};

let entries = index.entries_mut();
for (entry_index, change) in changes {
let entry = &mut entries[entry_index];
match change {
ApplyChange::SetSizeToZero => {
entry.stat.size = 0;
}
ApplyChange::NewStat(new_stat) => {
entry.stat = new_stat;
}
}
}

Some(index.write(crate::index::write::Options {
extensions: Default::default(),
skip_hash: self.skip_hash,
}))
}
}

pub(super) enum ApplyChange {
SetSizeToZero,
NewStat(crate::index::entry::Stat),
}

impl From<index_worktree::Item> for Item {
fn from(value: index_worktree::Item) -> Self {
Item::IndexWorktree(value)
}
}
Loading

0 comments on commit 5b8140f

Please sign in to comment.