From 1091e29c06a8682b761fe54bf18ae7e40121aa51 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 30 Jan 2025 20:27:30 +0100 Subject: [PATCH] fix!: `blob::UnifiedDiff::new(...newline...)` is now an Enum. That way one can specify where newlines should be added. --- gix-diff/src/blob/unified_diff.rs | 41 +++- gix-diff/tests/diff/blob/unified_diff.rs | 226 +++++++++++++++++++---- 2 files changed, 221 insertions(+), 46 deletions(-) diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs index bea6f98f560..8eac3f4d8d2 100644 --- a/gix-diff/src/blob/unified_diff.rs +++ b/gix-diff/src/blob/unified_diff.rs @@ -26,6 +26,19 @@ impl ContextSize { } } +/// Specify where to put a newline. +#[derive(Debug, Copy, Clone)] +pub enum NewlineSeparator<'a> { + /// Place the given newline separator, like `\n`, after each patch header as well as after each line. + /// This is the right choice if tokens don't include newlines. + AfterHeaderAndLine(&'a str), + /// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline. + /// This is the right choice if tokens do include newlines. + /// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended, + /// as it will make the last line look like it changed just because the whitespace at the end 'changed'. + AfterHeaderAndWhenNeeded(&'a str), +} + /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). pub trait ConsumeHunk { /// The item this instance produces after consuming all hunks. @@ -56,7 +69,7 @@ pub trait ConsumeHunk { } pub(super) mod _impl { - use super::{ConsumeHunk, ContextSize}; + use super::{ConsumeHunk, ContextSize, NewlineSeparator}; use bstr::{ByteSlice, ByteVec}; use imara_diff::{intern, Sink}; use intern::{InternedInput, Interner, Token}; @@ -86,7 +99,7 @@ pub(super) mod _impl { buffer: Vec, header_buf: String, delegate: D, - newline: &'a str, + newline: NewlineSeparator<'a>, err: Option, } @@ -101,11 +114,11 @@ pub(super) mod _impl { /// `context_size` is the amount of lines around each hunk which will be passed ///to `consume_hunk`. /// - /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`, + /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`. pub fn new( input: &'a InternedInput, consume_hunk: D, - newline_separator: &'a str, + newline_separator: NewlineSeparator<'a>, context_size: ContextSize, ) -> Self { Self { @@ -130,8 +143,18 @@ pub(super) mod _impl { fn print_tokens(&mut self, tokens: &[Token], prefix: char) { for &token in tokens { self.buffer.push_char(prefix); - self.buffer.push_str(&self.interner[token]); - self.buffer.push_str(self.newline.as_bytes()); + let line = &self.interner[token]; + self.buffer.push_str(line); + match self.newline { + NewlineSeparator::AfterHeaderAndLine(nl) => { + self.buffer.push_str(nl); + } + NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + if !line.as_ref().ends_with_str(nl) { + self.buffer.push_str(nl); + } + } + } } } @@ -153,7 +176,11 @@ pub(super) mod _impl { self.before_hunk_len, self.after_hunk_start + 1, self.after_hunk_len, - nl = self.newline + nl = match self.newline { + NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => { + nl + } + } ), ) .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?; diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs index aa43f5cd2cc..4bc8a385169 100644 --- a/gix-diff/tests/diff/blob/unified_diff.rs +++ b/gix-diff/tests/diff/blob/unified_diff.rs @@ -1,4 +1,4 @@ -use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize}; +use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator}; use gix_diff::blob::{Algorithm, UnifiedDiff}; #[test] @@ -10,7 +10,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(3), + ), )?; // merged by context. @@ -34,7 +39,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(1)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(1), + ), )?; // Small context lines keeps hunks separate. insta::assert_snapshot!(actual, @r" @@ -55,7 +65,12 @@ fn removed_modified_added() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(0)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(0), + ), )?; // No context is also fine insta::assert_snapshot!(actual, @r" @@ -69,41 +84,15 @@ fn removed_modified_added() -> crate::Result { +twelve "); - #[derive(Default)] - struct Recorder { - #[allow(clippy::type_complexity)] - hunks: Vec<((u32, u32), (u32, u32), String)>, - } - - impl ConsumeHunk for Recorder { - type Out = Vec<((u32, u32), (u32, u32), String)>; - - fn consume_hunk( - &mut self, - before_hunk_start: u32, - before_hunk_len: u32, - after_hunk_start: u32, - after_hunk_len: u32, - header: &str, - _hunk: &[u8], - ) -> std::io::Result<()> { - self.hunks.push(( - (before_hunk_start, before_hunk_len), - (after_hunk_start, after_hunk_len), - header.to_string(), - )); - Ok(()) - } - - fn finish(self) -> Self::Out { - self.hunks - } - } - let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, Recorder::default(), "\n", ContextSize::symmetrical(1)), + UnifiedDiff::new( + &interner, + Recorder::default(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(1), + ), )?; assert_eq!( actual, @@ -117,6 +106,119 @@ fn removed_modified_added() -> crate::Result { Ok(()) } +#[test] +fn removed_modified_added_with_newlines_in_tokens() -> crate::Result { + let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10"; + let b = "2\n3\n4\n5\nsix\n7\n8\n9\n10\neleven\ntwelve"; + + let a = gix_diff::blob::sources::lines_with_terminator(a); + let b = gix_diff::blob::sources::lines_with_terminator(b); + let interner = gix_diff::blob::intern::InternedInput::new(a, b); + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(3), + ), + )?; + + // merged by context. + // newline diffs differently. + insta::assert_snapshot!(actual, @r" + @@ -1,10 +1,11 @@ + -1 + 2 + 3 + 4 + 5 + -6 + +six + 7 + 8 + 9 + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(1), + ), + )?; + // Small context lines keeps hunks separate. + insta::assert_snapshot!(actual, @r" + @@ -1,2 +1,1 @@ + -1 + 2 + @@ -5,3 +4,3 @@ + 5 + -6 + +six + 7 + @@ -9,2 +8,4 @@ + 9 + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\n"), + ContextSize::symmetrical(0), + ), + )?; + // No context is also fine + insta::assert_snapshot!(actual, @r" + @@ -1,1 +1,0 @@ + -1 + @@ -6,1 +5,1 @@ + -6 + +six + @@ -10,1 +9,3 @@ + -10 + +10 + +eleven + +twelve + "); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new( + &interner, + Recorder::default(), + NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"), + ContextSize::symmetrical(1), + ), + )?; + assert_eq!( + actual, + &[ + ((1, 2), (1, 1), "@@ -1,2 +1,1 @@\r\n".to_string()), + ((5, 3), (4, 3), "@@ -5,3 +4,3 @@\r\n".into()), + ((9, 2), (8, 4), "@@ -9,2 +8,4 @@\r\n".into()) + ] + ); + + Ok(()) +} + #[test] fn all_added_or_removed() -> crate::Result { let content = "1\n2\n3\n4\n5"; @@ -127,7 +229,12 @@ fn all_added_or_removed() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(context_lines), + ), )?; assert_eq!( actual, @@ -147,7 +254,12 @@ fn all_added_or_removed() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(context_lines), + ), )?; assert_eq!( actual, @@ -170,9 +282,45 @@ fn empty() -> crate::Result { let actual = gix_diff::blob::diff( Algorithm::Myers, &interner, - UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)), + UnifiedDiff::new( + &interner, + String::new(), + NewlineSeparator::AfterHeaderAndLine("\n"), + ContextSize::symmetrical(3), + ), )?; insta::assert_snapshot!(actual, @r""); Ok(()) } + +#[derive(Default)] +struct Recorder { + #[allow(clippy::type_complexity)] + hunks: Vec<((u32, u32), (u32, u32), String)>, +} + +impl ConsumeHunk for Recorder { + type Out = Vec<((u32, u32), (u32, u32), String)>; + + fn consume_hunk( + &mut self, + before_hunk_start: u32, + before_hunk_len: u32, + after_hunk_start: u32, + after_hunk_len: u32, + header: &str, + _hunk: &[u8], + ) -> std::io::Result<()> { + self.hunks.push(( + (before_hunk_start, before_hunk_len), + (after_hunk_start, after_hunk_len), + header.to_string(), + )); + Ok(()) + } + + fn finish(self) -> Self::Out { + self.hunks + } +}