Skip to content

Commit

Permalink
Merge pull request #1820 from GitoxideLabs/improvements
Browse files Browse the repository at this point in the history
various improvements
  • Loading branch information
Byron authored Jan 30, 2025
2 parents 14d6b8d + 1091e29 commit daa6d4a
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 61 deletions.
41 changes: 34 additions & 7 deletions gix-diff/src/blob/unified_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ impl ContextSize {
}
}

/// Specify where to put a newline.
#[derive(Debug, Copy, Clone)]
pub enum NewlineSeparator<'a> {
/// Place the given newline separator, like `\n`, after each patch header as well as after each line.
/// This is the right choice if tokens don't include newlines.
AfterHeaderAndLine(&'a str),
/// Place the given newline separator, like `\n`, only after each patch header or if a line doesn't contain a newline.
/// This is the right choice if tokens do include newlines.
/// Note that diff-tokens *with* newlines may diff strangely at the end of files when lines have been appended,
/// as it will make the last line look like it changed just because the whitespace at the end 'changed'.
AfterHeaderAndWhenNeeded(&'a str),
}

/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
pub trait ConsumeHunk {
/// The item this instance produces after consuming all hunks.
Expand Down Expand Up @@ -56,7 +69,7 @@ pub trait ConsumeHunk {
}

pub(super) mod _impl {
use super::{ConsumeHunk, ContextSize};
use super::{ConsumeHunk, ContextSize, NewlineSeparator};
use bstr::{ByteSlice, ByteVec};
use imara_diff::{intern, Sink};
use intern::{InternedInput, Interner, Token};
Expand Down Expand Up @@ -86,7 +99,7 @@ pub(super) mod _impl {
buffer: Vec<u8>,
header_buf: String,
delegate: D,
newline: &'a str,
newline: NewlineSeparator<'a>,

err: Option<std::io::Error>,
}
Expand All @@ -101,11 +114,11 @@ pub(super) mod _impl {
/// `context_size` is the amount of lines around each hunk which will be passed
///to `consume_hunk`.
///
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
/// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
pub fn new(
input: &'a InternedInput<T>,
consume_hunk: D,
newline_separator: &'a str,
newline_separator: NewlineSeparator<'a>,
context_size: ContextSize,
) -> Self {
Self {
Expand All @@ -130,8 +143,18 @@ pub(super) mod _impl {
fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
for &token in tokens {
self.buffer.push_char(prefix);
self.buffer.push_str(&self.interner[token]);
self.buffer.push_str(self.newline.as_bytes());
let line = &self.interner[token];
self.buffer.push_str(line);
match self.newline {
NewlineSeparator::AfterHeaderAndLine(nl) => {
self.buffer.push_str(nl);
}
NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
if !line.as_ref().ends_with_str(nl) {
self.buffer.push_str(nl);
}
}
}
}
}

Expand All @@ -153,7 +176,11 @@ pub(super) mod _impl {
self.before_hunk_len,
self.after_hunk_start + 1,
self.after_hunk_len,
nl = self.newline
nl = match self.newline {
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
nl
}
}
),
)
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
Expand Down
226 changes: 187 additions & 39 deletions gix-diff/tests/diff/blob/unified_diff.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize};
use gix_diff::blob::unified_diff::{ConsumeHunk, ContextSize, NewlineSeparator};
use gix_diff::blob::{Algorithm, UnifiedDiff};

#[test]
Expand All @@ -10,7 +10,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(3),
),
)?;

// merged by context.
Expand All @@ -34,7 +39,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(1)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(1),
),
)?;
// Small context lines keeps hunks separate.
insta::assert_snapshot!(actual, @r"
Expand All @@ -55,7 +65,12 @@ fn removed_modified_added() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(0)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(0),
),
)?;
// No context is also fine
insta::assert_snapshot!(actual, @r"
Expand All @@ -69,41 +84,15 @@ fn removed_modified_added() -> crate::Result {
+twelve
");

#[derive(Default)]
struct Recorder {
#[allow(clippy::type_complexity)]
hunks: Vec<((u32, u32), (u32, u32), String)>,
}

impl ConsumeHunk for Recorder {
type Out = Vec<((u32, u32), (u32, u32), String)>;

fn consume_hunk(
&mut self,
before_hunk_start: u32,
before_hunk_len: u32,
after_hunk_start: u32,
after_hunk_len: u32,
header: &str,
_hunk: &[u8],
) -> std::io::Result<()> {
self.hunks.push((
(before_hunk_start, before_hunk_len),
(after_hunk_start, after_hunk_len),
header.to_string(),
));
Ok(())
}

fn finish(self) -> Self::Out {
self.hunks
}
}

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, Recorder::default(), "\n", ContextSize::symmetrical(1)),
UnifiedDiff::new(
&interner,
Recorder::default(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(1),
),
)?;
assert_eq!(
actual,
Expand All @@ -117,6 +106,119 @@ fn removed_modified_added() -> crate::Result {
Ok(())
}

#[test]
fn removed_modified_added_with_newlines_in_tokens() -> crate::Result {
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
let b = "2\n3\n4\n5\nsix\n7\n8\n9\n10\neleven\ntwelve";

let a = gix_diff::blob::sources::lines_with_terminator(a);
let b = gix_diff::blob::sources::lines_with_terminator(b);
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(3),
),
)?;

// merged by context.
// newline diffs differently.
insta::assert_snapshot!(actual, @r"
@@ -1,10 +1,11 @@
-1
2
3
4
5
-6
+six
7
8
9
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(1),
),
)?;
// Small context lines keeps hunks separate.
insta::assert_snapshot!(actual, @r"
@@ -1,2 +1,1 @@
-1
2
@@ -5,3 +4,3 @@
5
-6
+six
7
@@ -9,2 +8,4 @@
9
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
ContextSize::symmetrical(0),
),
)?;
// No context is also fine
insta::assert_snapshot!(actual, @r"
@@ -1,1 +1,0 @@
-1
@@ -6,1 +5,1 @@
-6
+six
@@ -10,1 +9,3 @@
-10
+10
+eleven
+twelve
");

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(
&interner,
Recorder::default(),
NewlineSeparator::AfterHeaderAndWhenNeeded("\r\n"),
ContextSize::symmetrical(1),
),
)?;
assert_eq!(
actual,
&[
((1, 2), (1, 1), "@@ -1,2 +1,1 @@\r\n".to_string()),
((5, 3), (4, 3), "@@ -5,3 +4,3 @@\r\n".into()),
((9, 2), (8, 4), "@@ -9,2 +8,4 @@\r\n".into())
]
);

Ok(())
}

#[test]
fn all_added_or_removed() -> crate::Result {
let content = "1\n2\n3\n4\n5";
Expand All @@ -127,7 +229,12 @@ fn all_added_or_removed() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(context_lines),
),
)?;
assert_eq!(
actual,
Expand All @@ -147,7 +254,12 @@ fn all_added_or_removed() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(context_lines)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(context_lines),
),
)?;
assert_eq!(
actual,
Expand All @@ -170,9 +282,45 @@ fn empty() -> crate::Result {
let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, String::new(), "\n", ContextSize::symmetrical(3)),
UnifiedDiff::new(
&interner,
String::new(),
NewlineSeparator::AfterHeaderAndLine("\n"),
ContextSize::symmetrical(3),
),
)?;

insta::assert_snapshot!(actual, @r"");
Ok(())
}

#[derive(Default)]
struct Recorder {
#[allow(clippy::type_complexity)]
hunks: Vec<((u32, u32), (u32, u32), String)>,
}

impl ConsumeHunk for Recorder {
type Out = Vec<((u32, u32), (u32, u32), String)>;

fn consume_hunk(
&mut self,
before_hunk_start: u32,
before_hunk_len: u32,
after_hunk_start: u32,
after_hunk_len: u32,
header: &str,
_hunk: &[u8],
) -> std::io::Result<()> {
self.hunks.push((
(before_hunk_start, before_hunk_len),
(after_hunk_start, after_hunk_len),
header.to_string(),
));
Ok(())
}

fn finish(self) -> Self::Out {
self.hunks
}
}
Loading

0 comments on commit daa6d4a

Please sign in to comment.