diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs index bea6f98f560..8eac3f4d8d2 100644 --- a/gix-diff/src/blob/unified_diff.rs +++ b/gix-diff/src/blob/unified_diff.rs @@ -26,6 +26,19 @@ impl ContextSize { } } +/// Specify where to put a newline. +#[derive(Debug, Copy, Clone)] +pub enum NewlineSeparator<'a> { + /// Place the given newline separator, like `\n`, after each patch header as well as after each line. + /// This is the right choice if tokens don't include newlines. + AfterHeaderAndLine(&'a str), + /// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline. + /// This is the right choice if tokens do include newlines. + /// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended, + /// as it will make the last line look like it changed just because the whitespace at the end 'changed'. + AfterHeaderAndWhenNeeded(&'a str), +} + /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). pub trait ConsumeHunk { /// The item this instance produces after consuming all hunks. @@ -56,7 +69,7 @@ pub trait ConsumeHunk { } pub(super) mod _impl { - use super::{ConsumeHunk, ContextSize}; + use super::{ConsumeHunk, ContextSize, NewlineSeparator}; use bstr::{ByteSlice, ByteVec}; use imara_diff::{intern, Sink}; use intern::{InternedInput, Interner, Token}; @@ -86,7 +99,7 @@ pub(super) mod _impl { buffer: Vec, header_buf: String, delegate: D, - newline: &'a str, + newline: NewlineSeparator<'a>, err: Option, } @@ -101,11 +114,11 @@ pub(super) mod _impl { /// `context_size` is the amount of lines around each hunk which will be passed ///to `consume_hunk`. /// - /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`, + /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`. pub fn new( input: &'a InternedInput, consume_hunk: D, - newline_separator: &'a str, + newline_separator: NewlineSeparator<'a>, context_size: ContextSize, ) -> Self { Self { @@ -130,8 +143,18 @@ pub(super) mod _impl { fn print_tokens(&mut self, tokens: &[Token], prefix: char) { for &token in tokens { self.buffer.push_char(prefix); - self.buffer.push_str(&self.interner[token]); - self.buffer.push_str(self.newline.as_bytes()); + let line = &self.interner[token]; + self.buffer.push_str(line); + match self.newline { + NewlineSeparator::AfterHeaderAndLine(nl) => { + self.buffer.push_str(nl); + } + NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + if !line.as_ref().ends_with_str(nl) { + self.buffer.push_str(nl); + } + } + } } } @@ -153,7 +176,11 @@ pub(super) mod _impl { self.before_hunk_len, self.after_hunk_start + 1, self.after_hunk_len, - nl = self.newline + nl = match self.newline { + NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + nl + } + } ), ) .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?; diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs index aa43f5cd2cc..4bc8a385169 100644 --- a/gix-diff/tests/diff/blob/unified_diff.rs +++ b/gix-diff/tests/diff/blob/unified_diff.rs @@ -1,4 +1,4 @@ -use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize}; +use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator}; use gix_diff::blob::{Algorithm, UnifiedDiff}; #[test] @@ -10,7 +10,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(3), + ), )?; // merged by context. @@ -34,7 +39,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(1)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(1), + ), )?; // Small context lines keeps hunks separate. insta::assert_snapshot!(actual, @r" @@ -55,7 +65,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(0)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(0), + ), )?; // No context is also fine insta::assert_snapshot!(actual, @r" @@ -69,41 +84,15 @@ fn removed_modified_added() -> crate::Result { +twelve "); - #[derive(Default)] - struct Recorder { - #[allow(clippy::type_complexity)] - hunks: Vec<((u32, u32), (u32, u32), String)>, - } - - impl ConsumeHunk for Recorder { - type Out = Vec<((u32, u32), (u32, u32), String)>; - - fn consume_hunk( - &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - _hunk: &[u8], - ) -> std::io::Result<()> { - self.hunks.push(( - (before_hunk_start, before_hunk_len), - (after_hunk_start, after_hunk_len), - header.to_string(), - )); - Ok(()) - } - - fn finish(self) -> Self::Out { - self.hunks - } - } - let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, Recorder::default(), "\n", ContextSize::symmetrical(1)), + UnifiedDiff::new( + &interner, + Recorder::default(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(1), + ), )?; assert_eq!( actual, @@ -117,6 +106,119 @@ fn removed_modified_added() -> crate::Result { Ok(()) } +#[test] +fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { + let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10"; + let b = "2\n3\n4\n5\nsix\n7\n8\n9\n10\neleven\ntwelve"; + + let a = gix_diff::blob::sources::lines_with_terminator(a); + let b = gix_diff::blob::sources::lines_with_terminator(b); + let interner = gix_diff::blob::intern::InternedInput::new(a, b); + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(3), + ), + )?; + + // merged by context. + // newline diffs differently. + insta::assert_snapshot!(actual, @r" + @@ -1,10 +1,11 @@ + -1 + 2 + 3 + 4 + 5 + -6 + +six + 7 + 8 + 9 + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(1), + ), + )?; + // Small context lines keeps hunks separate. + insta::assert_snapshot!(actual, @r" + @@ -1,2 +1,1 @@ + -1 + 2 + @@ -5,3 +4,3 @@ + 5 + -6 + +six + 7 + @@ -9,2 +8,4 @@ + 9 + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(0), + ), + )?; + // No context is also fine + insta::assert_snapshot!(actual, @r" + @@ -1,1 +1,0 @@ + -1 + @@ -6,1 +5,1 @@ + -6 + +six + @@ -10,1 +9,3 @@ + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + Recorder::default(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), + ContextSize::symmetrical(1), + ), + )?; + assert_eq!( + actual, + &[ + ((1, 2), (1, 1), "@@ -1,2 +1,1 @@\r\n".to_string()), + ((5, 3), (4, 3), "@@ -5,3 +4,3 @@\r\n".into()), + ((9, 2), (8, 4), "@@ -9,2 +8,4 @@\r\n".into()) + ] + ); + + Ok(()) +} + #[test] fn all_added_or_removed() -> crate::Result { let content = "1\n2\n3\n4\n5"; @@ -127,7 +229,12 @@ fn all_added_or_removed() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(context_lines), + ), )?; assert_eq!( actual, @@ -147,7 +254,12 @@ fn all_added_or_removed() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(context_lines), + ), )?; assert_eq!( actual, @@ -170,9 +282,45 @@ fn empty() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(3), + ), )?; insta::assert_snapshot!(actual, @r""); Ok(()) } + +#[derive(Default)] +struct Recorder { + #[allow(clippy::type_complexity)] + hunks: Vec<((u32, u32), (u32, u32), String)>, +} + +impl ConsumeHunk for Recorder { + type Out = Vec<((u32, u32), (u32, u32), String)>; + + fn consume_hunk( + &mut self, + before_hunk_start: u32, + before_hunk_len: u32, + after_hunk_start: u32, + after_hunk_len: u32, + header: &str, + _hunk: &[u8], + ) -> std::io::Result<()> { + self.hunks.push(( + (before_hunk_start, before_hunk_len), + (after_hunk_start, after_hunk_len), + header.to_string(), + )); + Ok(()) + } + + fn finish(self) -> Self::Out { + self.hunks + } +} diff --git a/gix/src/revision/walk.rs b/gix/src/revision/walk.rs index bdc233d9e41..9d62e474892 100644 --- a/gix/src/revision/walk.rs +++ b/gix/src/revision/walk.rs @@ -155,7 +155,7 @@ pub struct Platform<'repo> { /// The owning repository. pub repo: &'repo Repository, pub(crate) tips: Vec, - pub(crate) prune: Vec, + pub(crate) boundary: Vec, pub(crate) sorting: Sorting, pub(crate) parents: gix_traverse::commit::Parents, pub(crate) use_commit_graph: Option, @@ -171,7 +171,7 @@ impl<'repo> Platform<'repo> { parents: Default::default(), use_commit_graph: None, commit_graph: None, - prune: Vec::new(), + boundary: Vec::new(), } } } @@ -210,15 +210,16 @@ impl Platform<'_> { self } - /// Prune the commit with the given `ids` such that they won't be returned, and such that none of their ancestors is returned either. - /// - /// Note that this forces the [sorting](Self::sorting) to - /// [`ByCommitTimeCutoff`](Sorting::ByCommitTimeCutoff) configured with - /// the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either. + /// Don't cross the given `ids` during traversal. /// + /// Note that this forces the [sorting](Self::sorting()) to [`ByCommitTimeCutoff`](Sorting::ByCommitTimeCutoff) + /// configured with the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either. /// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date. - #[doc(alias = "hide", alias = "git2")] - pub fn with_pruned(mut self, ids: impl IntoIterator>) -> Self { + /// + /// A boundary is distinctly different from exclusive refsepcs `^branch-to-not-list` in Git log. + /// + /// If this is not desired, [set the sorting](Self::sorting()) to something else right after this call. + pub fn with_boundary(mut self, ids: impl IntoIterator>) -> Self { let (mut cutoff, order) = match self.sorting { Sorting::ByCommitTimeCutoff { seconds, order } => (Some(seconds), order), Sorting::ByCommitTime(order) => (None, order), @@ -226,13 +227,13 @@ impl Platform<'_> { }; for id in ids.into_iter() { let id = id.into(); - if !self.prune.contains(&id) { + if !self.boundary.contains(&id) { if let Some(time) = self.repo.find_commit(id).ok().and_then(|c| c.time().ok()) { if cutoff.is_none() || cutoff > Some(time.seconds) { cutoff = time.seconds.into(); } } - self.prune.push(id); + self.boundary.push(id); } } @@ -260,9 +261,9 @@ impl<'repo> Platform<'repo> { parents, use_commit_graph, commit_graph, - mut prune, + mut boundary, } = self; - prune.sort(); + boundary.sort(); Ok(revision::Walk { repo, inner: Box::new( @@ -277,7 +278,7 @@ impl<'repo> Platform<'repo> { return false; } let id = id.to_owned(); - if prune.binary_search(&id).is_ok() { + if boundary.binary_search(&id).is_ok() { return false; } match shallow_commits.as_ref() { diff --git a/gix/tests/gix/id.rs b/gix/tests/gix/id.rs index 2d2f24948fc..397d41c9cc1 100644 --- a/gix/tests/gix/id.rs +++ b/gix/tests/gix/id.rs @@ -140,7 +140,7 @@ mod ancestors { for use_commit_graph in [false, true] { let commits_graph_order = head .ancestors() - .with_pruned(Some(hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"))) + .with_boundary(Some(hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"))) .use_commit_graph(use_commit_graph) .all()? .map(|c| c.map(|c| c.id))