diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 495d23bd433..b64092c92e8 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -110,11 +110,25 @@ pub mod resource { } } - /// Produce an iterator over lines, separated by LF or CRLF, suitable to create tokens using - /// [`imara_diff::intern::InternedInput`]. + /// Produce an iterator over lines, separated by LF or CRLF and thus keeping newlines. + /// + /// Note that this will cause unusual diffs if a file didn't end in newline but lines were added + /// on the other side. + /// + /// Suitable to create tokens using [`imara_diff::intern::InternedInput`]. pub fn intern_source(&self) -> imara_diff::sources::ByteLines<'a, true> { crate::blob::sources::byte_lines_with_terminator(self.data.as_slice().unwrap_or_default()) } + + /// Produce an iterator over lines, but remove LF or CRLF. + /// + /// This produces the expected diffs when lines were added at the end of a file that didn't end + /// with a newline before the change. + /// + /// Suitable to create tokens using [`imara_diff::intern::InternedInput`]. + pub fn intern_source_strip_newline_separators(&self) -> imara_diff::sources::ByteLines<'a, false> { + crate::blob::sources::byte_lines(self.data.as_slice().unwrap_or_default()) + } } /// The data of a diffable resource, as it could be determined and computed previously. @@ -228,8 +242,15 @@ pub mod prepare_diff { impl<'a> Outcome<'a> { /// Produce an instance of an interner which `git` would use to perform diffs. + /// + /// Note that newline separators will be removed to improve diff quality + /// at the end of files that didn't have a newline, but had lines added + /// past the end. pub fn interned_input(&self) -> imara_diff::intern::InternedInput<&'a [u8]> { - crate::blob::intern::InternedInput::new(self.old.intern_source(), self.new.intern_source()) + crate::blob::intern::InternedInput::new( + self.old.intern_source_strip_newline_separators(), + self.new.intern_source_strip_newline_separators(), + ) } } diff --git a/gix/tests/gix/object/tree/diff.rs b/gix/tests/gix/object/tree/diff.rs index 1109188d26f..36a25e86c17 100644 --- a/gix/tests/gix/object/tree/diff.rs +++ b/gix/tests/gix/object/tree/diff.rs @@ -59,10 +59,17 @@ fn changes_against_tree_modified() -> crate::Result { diff.lines(|hunk| { match hunk { lines::Change::Deletion { .. } => unreachable!("there was no deletion"), - lines::Change::Addition { lines } => assert_eq!( - lines, - vec![expected_data[expected_previous_data.len()..].as_bytes().as_bstr()] - ), + lines::Change::Addition { lines } => { + assert_eq!(lines.len(), 1); + assert_eq!( + lines[0], + expected_data[expected_previous_data.len()..] + .as_bytes() + .as_bstr() + .trim(), + "diffed lines don't have newlines anymore" + ); + } lines::Change::Modification { .. } => unreachable!("there was no modification"), }; Ok::<_, Infallible>(())