From c30ac0c9f89fb4480c8f8c8c0c06fa046dcd4314 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 13 Aug 2023 16:02:36 +0200 Subject: [PATCH] feat: `gix index entries` with styles and pathspecs. This adds support for more simple git style, which is faster and thus allows for more direct comparisons to `git ls-files`. --- .gitattributes | 2 +- gitoxide-core/src/query/engine/command.rs | 2 +- .../src/repository/attributes/query.rs | 2 +- gitoxide-core/src/repository/exclude.rs | 2 +- gitoxide-core/src/repository/index/entries.rs | 205 +++++++++++++----- src/plumbing/main.rs | 19 +- src/plumbing/options/mod.rs | 22 +- 7 files changed, 185 insertions(+), 69 deletions(-) diff --git a/.gitattributes b/.gitattributes index 327b9259902..f44093efc92 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,5 @@ **/generated-archives/*.tar.xz filter=lfs diff=lfs merge=lfs -text # assure line feeds don't interfere with our working copy hash -**/tests/fixtures/*.sh text crlf=input eol=lf +**/tests/fixtures/**/*.sh text crlf=input eol=lf /justfile text crlf=input eol=lf diff --git a/gitoxide-core/src/query/engine/command.rs b/gitoxide-core/src/query/engine/command.rs index b0021663e18..37218c45e6b 100644 --- a/gitoxide-core/src/query/engine/command.rs +++ b/gitoxide-core/src/query/engine/command.rs @@ -21,7 +21,7 @@ impl query::Engine { let is_excluded = spec.is_excluded(); let relpath = spec .normalize( - self.repo.prefix().transpose()?.unwrap_or_default().as_ref(), + self.repo.prefix()?.unwrap_or_default().as_ref(), self.repo.work_dir().unwrap_or_else(|| self.repo.git_dir()), )? .path(); diff --git a/gitoxide-core/src/repository/attributes/query.rs b/gitoxide-core/src/repository/attributes/query.rs index 70627842d5a..9006edf7a26 100644 --- a/gitoxide-core/src/repository/attributes/query.rs +++ b/gitoxide-core/src/repository/attributes/query.rs @@ -32,7 +32,7 @@ pub(crate) mod function { // TODO(pathspec): The search is just used as a shortcut to normalization, but one day should be used for an actual search. let search = gix::pathspec::Search::from_specs( pathspecs, - repo.prefix().transpose()?.as_deref(), + repo.prefix()?.as_deref(), repo.work_dir().unwrap_or_else(|| repo.git_dir()), )?; diff --git a/gitoxide-core/src/repository/exclude.rs b/gitoxide-core/src/repository/exclude.rs index 9ceca4e9bc6..a91a695d1d3 100644 --- a/gitoxide-core/src/repository/exclude.rs +++ b/gitoxide-core/src/repository/exclude.rs @@ -44,7 +44,7 @@ pub fn query( // TODO(pathspec): actually use the search to find items. This looks like `gix` capabilities to put it all together. let search = gix::pathspec::Search::from_specs( pathspecs, - repo.prefix().transpose()?.as_deref(), + repo.prefix()?.as_deref(), repo.work_dir().unwrap_or_else(|| repo.git_dir()), )?; diff --git a/gitoxide-core/src/repository/index/entries.rs b/gitoxide-core/src/repository/index/entries.rs index c64559020e3..3a899b4760e 100644 --- a/gitoxide-core/src/repository/index/entries.rs +++ b/gitoxide-core/src/repository/index/entries.rs @@ -4,9 +4,10 @@ pub struct Options { /// If true, also show attributes pub attributes: Option, pub statistics: bool, + pub simple: bool, } -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub enum Attributes { /// Look at worktree attributes and index as fallback. WorktreeAndIndex, @@ -15,6 +16,7 @@ pub enum Attributes { } pub(crate) mod function { + use std::collections::BTreeSet; use std::{ borrow::Cow, io::{BufWriter, Write}, @@ -26,9 +28,11 @@ pub(crate) mod function { pub fn entries( repo: gix::Repository, + pathspecs: Vec, out: impl std::io::Write, mut err: impl std::io::Write, Options { + simple, format, attributes, statistics, @@ -37,6 +41,12 @@ pub(crate) mod function { use crate::OutputFormat::*; let index = repo.index_or_load_from_head()?; let mut cache = attributes + .or_else(|| { + pathspecs + .iter() + .any(|spec| !spec.attributes.is_empty()) + .then_some(Attributes::Index) + }) .map(|attrs| { repo.attributes( &index, @@ -70,52 +80,111 @@ pub(crate) mod function { ..Default::default() }; - let mut out = BufWriter::new(out); + let mut out = BufWriter::with_capacity(64 * 1024, out); #[cfg(feature = "serde")] if let Json = format { out.write_all(b"[\n")?; } - let mut entries = index.entries().iter().peekable(); - while let Some(entry) = entries.next() { - let attrs = cache - .as_mut() - .map(|(attrs, cache)| { - cache - .at_entry(entry.path(&index), None, |id, buf| repo.objects.find_blob(id, buf)) - .map(|entry| { - let is_excluded = entry.is_excluded(); - stats.excluded += usize::from(is_excluded); - let attributes: Vec<_> = { - entry.matching_attributes(attrs); - attrs.iter().map(|m| m.assignment.to_owned()).collect() - }; - stats.with_attributes += usize::from(!attributes.is_empty()); - Attrs { - is_excluded, - attributes, - } + let mut search = gix::pathspec::Search::from_specs( + pathspecs, + repo.prefix()?.as_deref(), + gix::path::realpath(repo.work_dir().unwrap_or_else(|| repo.git_dir()))?.as_ref(), // TODO(pathspec): this setup needs `gix`. + )?; + let mut all_attrs = statistics.then(BTreeSet::new); + if let Some(entries) = index.prefixed_entries(search.common_prefix()) { + stats.entries_after_prune = entries.len(); + let mut entries = entries.iter().peekable(); + while let Some(entry) = entries.next() { + let mut last_match = None; + let attrs = cache + .as_mut() + .and_then(|(attrs, cache)| { + // If the user wants to see assigned attributes, we always have to match. + attributes.is_some().then(|| { + cache + .at_entry(entry.path(&index), None, |id, buf| repo.objects.find_blob(id, buf)) + .map(|entry| { + let is_excluded = entry.is_excluded(); + stats.excluded += usize::from(is_excluded); + let attributes: Vec<_> = { + last_match = Some(entry.matching_attributes(attrs)); + attrs.iter().map(|m| m.assignment.to_owned()).collect() + }; + stats.with_attributes += usize::from(!attributes.is_empty()); + stats.max_attributes_per_path = stats.max_attributes_per_path.max(attributes.len()); + if let Some(attrs) = all_attrs.as_mut() { + attributes.iter().for_each(|attr| { + attrs.insert(attr.clone()); + }); + } + Attrs { + is_excluded, + attributes, + } + }) }) - }) - .transpose()?; - match format { - Human => to_human(&mut out, &index, entry, attrs)?, - #[cfg(feature = "serde")] - Json => to_json(&mut out, &index, entry, attrs, entries.peek().is_none())?, + }) + .transpose()?; + + // Note that we intentionally ignore `_case` so that we act like git does, attribute matching case is determined + // by the repository, not the pathspec. + if search + .pattern_matching_relative_path(entry.path(&index), Some(false), |rela_path, _case, is_dir, out| { + cache + .as_mut() + .map(|(attrs, cache)| { + match last_match { + // The user wants the attributes for display, so the match happened already. + Some(matched) => { + attrs.copy_into(cache.attributes_collection(), out); + matched + } + // The user doesn't want attributes, so we set the cache position on demand only + None => cache + .at_entry(rela_path, Some(is_dir), |id, buf| repo.objects.find_blob(id, buf)) + .ok() + .map(|platform| platform.matching_attributes(out)) + .unwrap_or_default(), + } + }) + .unwrap_or_default() + }) + .map_or(true, |m| m.is_excluded()) + { + continue; + } + match format { + Human => { + if simple { + to_human_simple(&mut out, &index, entry, attrs) + } else { + to_human(&mut out, &index, entry, attrs) + }? + } + #[cfg(feature = "serde")] + Json => to_json(&mut out, &index, entry, attrs, entries.peek().is_none())?, + } } - } - #[cfg(feature = "serde")] - if format == Json { - out.write_all(b"]\n")?; - out.flush()?; - if statistics { - serde_json::to_writer_pretty(&mut err, &stats)?; + #[cfg(feature = "serde")] + if format == Json { + out.write_all(b"]\n")?; + out.flush()?; + if statistics { + serde_json::to_writer_pretty(&mut err, &stats)?; + } + } + if format == Human && statistics { + out.flush()?; + stats.cache = cache.map(|c| *c.1.statistics()); + writeln!(err, "{stats:#?}")?; + if let Some(attrs) = all_attrs.filter(|a| !a.is_empty()) { + writeln!(err, "All encountered attributes:")?; + for attr in attrs { + writeln!(err, "\t{attr}", attr = attr.as_ref())?; + } + } } - } - if format == Human && statistics { - out.flush()?; - stats.cache = cache.map(|c| *c.1.statistics()); - writeln!(err, "{stats:#?}")?; } Ok(()) } @@ -131,8 +200,10 @@ pub(crate) mod function { struct Statistics { #[allow(dead_code)] // Not really dead, but Debug doesn't count for it even though it's crucial. pub entries: usize, + pub entries_after_prune: usize, pub excluded: usize, pub with_attributes: usize, + pub max_attributes_per_path: usize, pub cache: Option, } @@ -175,6 +246,22 @@ pub(crate) mod function { Ok(()) } + fn to_human_simple( + out: &mut impl std::io::Write, + file: &gix::index::File, + entry: &gix::index::Entry, + attrs: Option, + ) -> std::io::Result<()> { + match attrs { + Some(attrs) => { + out.write_all(entry.path(file))?; + out.write_all(print_attrs(Some(attrs)).as_bytes()) + } + None => out.write_all(entry.path(file)), + }?; + out.write_all(b"\n") + } + fn to_human( out: &mut impl std::io::Write, file: &gix::index::File, @@ -198,24 +285,28 @@ pub(crate) mod function { entry.mode, entry.id, entry.path(file), - attrs.map_or(Cow::Borrowed(""), |a| { - let mut buf = String::new(); - if a.is_excluded { - buf.push_str(" ❌"); - } - if !a.attributes.is_empty() { - buf.push_str(" ("); - for assignment in a.attributes { - use std::fmt::Write; - write!(&mut buf, "{}", assignment.as_ref()).ok(); - buf.push_str(", "); - } - buf.pop(); - buf.pop(); - buf.push(')'); - } - buf.into() - }) + print_attrs(attrs) ) } + + fn print_attrs(attrs: Option) -> Cow<'static, str> { + attrs.map_or(Cow::Borrowed(""), |a| { + let mut buf = String::new(); + if a.is_excluded { + buf.push_str(" ❌"); + } + if !a.attributes.is_empty() { + buf.push_str(" ("); + for assignment in a.attributes { + use std::fmt::Write; + write!(&mut buf, "{}", assignment.as_ref()).ok(); + buf.push_str(", "); + } + buf.pop(); + buf.pop(); + buf.push(')'); + } + buf.into() + }) + } } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 20f907a682f..8fe4b0abcd9 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1018,7 +1018,7 @@ pub fn main() -> Result<()> { ), }, Subcommands::Attributes(cmd) => match cmd { - attributes::Subcommands::Query { statistics, pathspecs } => prepare_and_run( + attributes::Subcommands::Query { statistics, pathspec } => prepare_and_run( "attributes-query", trace, verbose, @@ -1029,7 +1029,7 @@ pub fn main() -> Result<()> { use gix::bstr::ByteSlice; core::repository::attributes::query( repository(Mode::Strict)?, - if pathspecs.is_empty() { + if pathspec.is_empty() { Box::new(stdin_or_bail()?.byte_lines().filter_map(Result::ok).filter_map(|line| { gix::pathspec::parse( line.as_bstr(), @@ -1039,7 +1039,7 @@ pub fn main() -> Result<()> { .ok() })) as Box> } else { - Box::new(pathspecs.into_iter()) + Box::new(pathspec.into_iter()) }, out, err, @@ -1076,7 +1076,7 @@ pub fn main() -> Result<()> { exclude::Subcommands::Query { statistics, patterns, - pathspecs, + pathspec, show_ignore_patterns, } => prepare_and_run( "exclude-query", @@ -1088,7 +1088,7 @@ pub fn main() -> Result<()> { move |_progress, out, err| { core::repository::exclude::query( repository(Mode::Strict)?, - if pathspecs.is_empty() { + if pathspec.is_empty() { Box::new( stdin_or_bail()? .byte_lines() @@ -1096,7 +1096,7 @@ pub fn main() -> Result<()> { .filter_map(|line| gix::pathspec::parse(&line, Default::default()).ok()), ) as Box> } else { - Box::new(pathspecs.into_iter()) + Box::new(pathspec.into_iter()) }, out, err, @@ -1112,9 +1112,11 @@ pub fn main() -> Result<()> { }, Subcommands::Index(cmd) => match cmd { index::Subcommands::Entries { + format: entry_format, no_attributes, attributes_from_index, statistics, + pathspec, } => prepare_and_run( "index-entries", trace, @@ -1125,10 +1127,15 @@ pub fn main() -> Result<()> { move |_progress, out, err| { core::repository::index::entries( repository(Mode::LenientWithGitInstallConfig)?, + pathspec, out, err, core::repository::index::entries::Options { format, + simple: match entry_format { + index::entries::Format::Simple => true, + index::entries::Format::Rich => false, + }, attributes: if no_attributes { None } else { diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 64f591f4cb0..19b0004d501 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -610,7 +610,7 @@ pub mod attributes { statistics: bool, /// The git path specifications to list attributes for, or unset to read from stdin one per line. #[clap(value_parser = AsPathSpec)] - pathspecs: Vec, + pathspec: Vec, }, } } @@ -639,18 +639,33 @@ pub mod exclude { patterns: Vec, /// The git path specifications to check for exclusion, or unset to read from stdin one per line. #[clap(value_parser = AsPathSpec)] - pathspecs: Vec, + pathspec: Vec, }, } } pub mod index { + use crate::shared::AsPathSpec; use std::path::PathBuf; + pub mod entries { + #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)] + pub enum Format { + /// + #[default] + Simple, + /// Use the `.tar` file format, uncompressed. + Rich, + } + } + #[derive(Debug, clap::Subcommand)] pub enum Subcommands { /// Print all entries to standard output Entries { + /// How to output index entries. + #[clap(long, short = 'f', default_value = "simple", value_enum)] + format: entries::Format, /// Do not visualize excluded entries or attributes per path. #[clap(long)] no_attributes: bool, @@ -662,6 +677,9 @@ pub mod index { /// Print various statistics to stderr #[clap(long, short = 's')] statistics: bool, + /// The git path specifications to match entries to print. + #[clap(value_parser = AsPathSpec)] + pathspec: Vec, }, /// Create an index from a tree-ish. #[clap(visible_alias = "read-tree")]