diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 3b84022bdea..c6a0f3148d6 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -11,6 +11,10 @@ pub mod pipeline; /// pub mod platform; +/// +pub mod unified_diff; +pub use unified_diff::_impl::UnifiedDiff; + /// Information about the diff performed to detect similarity. #[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd)] pub struct DiffLineStats { diff --git a/gix-diff/src/blob/unified_diff.rs b/gix-diff/src/blob/unified_diff.rs new file mode 100644 index 00000000000..78d1f3aae79 --- /dev/null +++ b/gix-diff/src/blob/unified_diff.rs @@ -0,0 +1,174 @@ +//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14. +//! + +/// Defines the size of the context printed before and after each change. +/// +/// Similar to the `-U` option in git diff or gnu-diff. If the context overlaps +/// with previous or next change, the context gets reduced accordingly. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct ContextSize { + /// Defines the size of the context printed before and after each change. + symmetrical: u32, +} + +impl Default for ContextSize { + fn default() -> Self { + ContextSize::symmetrical(3) + } +} + +/// Instantiation +impl ContextSize { + /// Create a symmetrical context with `n` lines before and after a changed hunk. + pub fn symmetrical(n: u32) -> Self { + ContextSize { symmetrical: n } + } +} + +pub(super) mod _impl { + use imara_diff::{intern, Sink}; + use std::fmt::{Display, Write}; + use std::hash::Hash; + use std::ops::Range; + + use super::ContextSize; + use intern::{InternedInput, Interner, Token}; + + /// A [`Sink`] that creates a textual diff + /// in the format typically output by git or gnu-diff if the `-u` option is used + pub struct UnifiedDiff<'a, W, T> + where + W: Write, + T: Hash + Eq + Display, + { + before: &'a [Token], + after: &'a [Token], + interner: &'a Interner, + + pos: u32, + before_hunk_start: u32, + after_hunk_start: u32, + before_hunk_len: u32, + after_hunk_len: u32, + /// Symmetrical context before and after the changed hunk. + ctx_size: u32, + + buffer: String, + dst: W, + } + + impl<'a, T> UnifiedDiff<'a, String, T> + where + T: Hash + Eq + Display, + { + /// Create a new `UnifiedDiffBuilder` for the given `input`, + /// displaying `context_size` lines of context around each change, + /// that will return a [`String`]. + pub fn new(input: &'a InternedInput, context_size: ContextSize) -> Self { + Self { + before_hunk_start: 0, + after_hunk_start: 0, + before_hunk_len: 0, + after_hunk_len: 0, + buffer: String::with_capacity(8), + dst: String::new(), + interner: &input.interner, + before: &input.before, + after: &input.after, + pos: 0, + ctx_size: context_size.symmetrical, + } + } + } + + impl<'a, W, T> UnifiedDiff<'a, W, T> + where + W: Write, + T: Hash + Eq + Display, + { + /// Create a new `UnifiedDiffBuilder` for the given `input`, + /// displaying `context_size` lines of context around each change, + /// that will writes it output to the provided implementation of [`Write`]. + pub fn with_writer(input: &'a InternedInput, writer: W, context_size: Option) -> Self { + Self { + before_hunk_start: 0, + after_hunk_start: 0, + before_hunk_len: 0, + after_hunk_len: 0, + buffer: String::with_capacity(8), + dst: writer, + interner: &input.interner, + before: &input.before, + after: &input.after, + pos: 0, + ctx_size: context_size.unwrap_or(3), + } + } + + fn print_tokens(&mut self, tokens: &[Token], prefix: char) { + for &token in tokens { + writeln!(&mut self.buffer, "{prefix}{}", self.interner[token]).unwrap(); + } + } + + fn flush(&mut self) { + if self.before_hunk_len == 0 && self.after_hunk_len == 0 { + return; + } + + let end = (self.pos + self.ctx_size).min(self.before.len() as u32); + self.update_pos(end, end); + + writeln!( + &mut self.dst, + "@@ -{},{} +{},{} @@", + self.before_hunk_start + 1, + self.before_hunk_len, + self.after_hunk_start + 1, + self.after_hunk_len, + ) + .unwrap(); + write!(&mut self.dst, "{}", &self.buffer).unwrap(); + self.buffer.clear(); + self.before_hunk_len = 0; + self.after_hunk_len = 0 + } + + fn update_pos(&mut self, print_to: u32, move_to: u32) { + self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' '); + let len = print_to - self.pos; + self.pos = move_to; + self.before_hunk_len += len; + self.after_hunk_len += len; + } + } + + impl Sink for UnifiedDiff<'_, W, T> + where + W: Write, + T: Hash + Eq + Display, + { + type Out = W; + + fn process_change(&mut self, before: Range, after: Range) { + if ((self.pos == 0) && (before.start - self.pos > self.ctx_size)) + || (before.start - self.pos > 2 * self.ctx_size) + { + self.flush(); + self.pos = before.start - self.ctx_size; + self.before_hunk_start = self.pos; + self.after_hunk_start = after.start - self.ctx_size; + } + self.update_pos(before.start, before.end); + self.before_hunk_len += before.end - before.start; + self.after_hunk_len += after.end - after.start; + self.print_tokens(&self.before[before.start as usize..before.end as usize], '-'); + self.print_tokens(&self.after[after.start as usize..after.end as usize], '+'); + } + + fn finish(mut self) -> Self::Out { + self.flush(); + self.dst + } + } +} diff --git a/gix-diff/tests/diff/blob/mod.rs b/gix-diff/tests/diff/blob/mod.rs index bb412d412cf..1959c4e6fdb 100644 --- a/gix-diff/tests/diff/blob/mod.rs +++ b/gix-diff/tests/diff/blob/mod.rs @@ -1,2 +1,3 @@ pub(crate) mod pipeline; mod platform; +mod unified_diff; diff --git a/gix-diff/tests/diff/blob/unified_diff.rs b/gix-diff/tests/diff/blob/unified_diff.rs new file mode 100644 index 00000000000..e69de29bb2d