1+ //! Facilities to produce the unified diff format.
12//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14.
23//!
34
@@ -25,21 +26,50 @@ impl ContextSize {
2526 }
2627}
2728
29+ /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
30+ pub trait ConsumeHunk {
31+ /// The item this instance produces after consuming all hunks.
32+ type Out ;
33+
34+ /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`.
35+ /// Note that all newlines are added.
36+ ///
37+ /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`].
38+ /// After this method returned its first error, it will not be called anymore.
39+ ///
40+ /// The following is hunk-related information and the same that is used in the `header`.
41+ /// * `before_hunk_start` is the 1-based first line of this hunk in the old file.
42+ /// * `before_hunk_len` the amount of lines of this hunk in the old file.
43+ /// * `after_hunk_start` is the 1-based first line of this hunk in the new file.
44+ /// * `after_hunk_len` the amount of lines of this hunk in the new file.
45+ fn consume_hunk (
46+ & mut self ,
47+ before_hunk_start : u32 ,
48+ before_hunk_len : u32 ,
49+ after_hunk_start : u32 ,
50+ after_hunk_len : u32 ,
51+ header : & str ,
52+ hunk : & [ u8 ] ,
53+ ) -> std:: io:: Result < ( ) > ;
54+ /// Called after the last hunk is consumed to produce an output.
55+ fn finish ( self ) -> Self :: Out ;
56+ }
57+
2858pub ( super ) mod _impl {
59+ use super :: { ConsumeHunk , ContextSize } ;
60+ use bstr:: { ByteSlice , ByteVec } ;
2961 use imara_diff:: { intern, Sink } ;
30- use std :: fmt :: { Display , Write } ;
62+ use intern :: { InternedInput , Interner , Token } ;
3163 use std:: hash:: Hash ;
64+ use std:: io:: ErrorKind ;
3265 use std:: ops:: Range ;
3366
34- use super :: ContextSize ;
35- use intern:: { InternedInput , Interner , Token } ;
36-
37- /// A [`Sink`] that creates a textual diff
38- /// in the format typically output by git or gnu-diff if the `-u` option is used
39- pub struct UnifiedDiff < ' a , W , T >
67+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
68+ /// and passes it in full to a consumer.
69+ pub struct UnifiedDiff < ' a , T , D >
4070 where
41- W : Write ,
42- T : Hash + Eq + Display ,
71+ T : Hash + Eq + AsRef < [ u8 ] > ,
72+ D : ConsumeHunk ,
4373 {
4474 before : & ' a [ Token ] ,
4575 after : & ' a [ Token ] ,
@@ -53,85 +83,91 @@ pub(super) mod _impl {
5383 /// Symmetrical context before and after the changed hunk.
5484 ctx_size : u32 ,
5585
56- buffer : String ,
57- dst : W ,
86+ buffer : Vec < u8 > ,
87+ header_buf : String ,
88+ delegate : D ,
89+ newline : & ' a str ,
90+
91+ err : Option < std:: io:: Error > ,
5892 }
5993
60- impl < ' a , T > UnifiedDiff < ' a , String , T >
94+ impl < ' a , T , D > UnifiedDiff < ' a , T , D >
6195 where
62- T : Hash + Eq + Display ,
96+ T : Hash + Eq + AsRef < [ u8 ] > ,
97+ D : ConsumeHunk ,
6398 {
6499 /// Create a new `UnifiedDiffBuilder` for the given `input`,
65100 /// displaying `context_size` lines of context around each change,
66- /// that will return a [`String`].
67- pub fn new ( input : & ' a InternedInput < T > , context_size : ContextSize ) -> Self {
101+ /// that will write it output to the provided implementation of [`Write`].
102+ ///
103+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`,
104+ pub fn new (
105+ input : & ' a InternedInput < T > ,
106+ consume_hunk : D ,
107+ newline_separator : & ' a str ,
108+ context_size : ContextSize ,
109+ ) -> Self {
68110 Self {
69111 before_hunk_start : 0 ,
70112 after_hunk_start : 0 ,
71113 before_hunk_len : 0 ,
72114 after_hunk_len : 0 ,
73- buffer : String :: with_capacity ( 8 ) ,
74- dst : String :: new ( ) ,
115+ buffer : Vec :: with_capacity ( 8 ) ,
116+ header_buf : String :: new ( ) ,
117+ delegate : consume_hunk,
75118 interner : & input. interner ,
76119 before : & input. before ,
77120 after : & input. after ,
78121 pos : 0 ,
79122 ctx_size : context_size. symmetrical ,
80- }
81- }
82- }
123+ newline : newline_separator,
83124
84- impl < ' a , W , T > UnifiedDiff < ' a , W , T >
85- where
86- W : Write ,
87- T : Hash + Eq + Display ,
88- {
89- /// Create a new `UnifiedDiffBuilder` for the given `input`,
90- /// displaying `context_size` lines of context around each change,
91- /// that will writes it output to the provided implementation of [`Write`].
92- pub fn with_writer ( input : & ' a InternedInput < T > , writer : W , context_size : Option < u32 > ) -> Self {
93- Self {
94- before_hunk_start : 0 ,
95- after_hunk_start : 0 ,
96- before_hunk_len : 0 ,
97- after_hunk_len : 0 ,
98- buffer : String :: with_capacity ( 8 ) ,
99- dst : writer,
100- interner : & input. interner ,
101- before : & input. before ,
102- after : & input. after ,
103- pos : 0 ,
104- ctx_size : context_size. unwrap_or ( 3 ) ,
125+ err : None ,
105126 }
106127 }
107128
108129 fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
109130 for & token in tokens {
110- writeln ! ( & mut self . buffer, "{prefix}{}" , self . interner[ token] ) . unwrap ( ) ;
131+ self . buffer . push_char ( prefix) ;
132+ self . buffer . push_str ( & self . interner [ token] ) ;
133+ self . buffer . push_str ( self . newline . as_bytes ( ) ) ;
111134 }
112135 }
113136
114- fn flush ( & mut self ) {
137+ fn flush ( & mut self ) -> std :: io :: Result < ( ) > {
115138 if self . before_hunk_len == 0 && self . after_hunk_len == 0 {
116- return ;
139+ return Ok ( ( ) ) ;
117140 }
118141
119142 let end = ( self . pos + self . ctx_size ) . min ( self . before . len ( ) as u32 ) ;
120143 self . update_pos ( end, end) ;
121144
122- writeln ! (
123- & mut self . dst,
124- "@@ -{},{} +{},{} @@" ,
145+ self . header_buf . clear ( ) ;
146+
147+ std:: fmt:: Write :: write_fmt (
148+ & mut self . header_buf ,
149+ format_args ! (
150+ "@@ -{},{} +{},{} @@{nl}" ,
151+ self . before_hunk_start + 1 ,
152+ self . before_hunk_len,
153+ self . after_hunk_start + 1 ,
154+ self . after_hunk_len,
155+ nl = self . newline
156+ ) ,
157+ )
158+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
159+ self . delegate . consume_hunk (
125160 self . before_hunk_start + 1 ,
126161 self . before_hunk_len ,
127162 self . after_hunk_start + 1 ,
128163 self . after_hunk_len ,
129- )
130- . unwrap ( ) ;
131- write ! ( & mut self . dst , "{}" , & self . buffer ) . unwrap ( ) ;
164+ & self . header_buf ,
165+ & self . buffer ,
166+ ) ? ;
132167 self . buffer . clear ( ) ;
133168 self . before_hunk_len = 0 ;
134- self . after_hunk_len = 0
169+ self . after_hunk_len = 0 ;
170+ Ok ( ( ) )
135171 }
136172
137173 fn update_pos ( & mut self , print_to : u32 , move_to : u32 ) {
@@ -143,18 +179,24 @@ pub(super) mod _impl {
143179 }
144180 }
145181
146- impl < W , T > Sink for UnifiedDiff < ' _ , W , T >
182+ impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
147183 where
148- W : Write ,
149- T : Hash + Eq + Display ,
184+ T : Hash + Eq + AsRef < [ u8 ] > ,
185+ D : ConsumeHunk ,
150186 {
151- type Out = W ;
187+ type Out = std :: io :: Result < D :: Out > ;
152188
153189 fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
190+ if self . err . is_some ( ) {
191+ return ;
192+ }
154193 if ( ( self . pos == 0 ) && ( before. start - self . pos > self . ctx_size ) )
155194 || ( before. start - self . pos > 2 * self . ctx_size )
156195 {
157- self . flush ( ) ;
196+ if let Err ( err) = self . flush ( ) {
197+ self . err = Some ( err) ;
198+ return ;
199+ }
158200 self . pos = before. start - self . ctx_size ;
159201 self . before_hunk_start = self . pos ;
160202 self . after_hunk_start = after. start - self . ctx_size ;
@@ -167,8 +209,46 @@ pub(super) mod _impl {
167209 }
168210
169211 fn finish ( mut self ) -> Self :: Out {
170- self . flush ( ) ;
171- self . dst
212+ if let Err ( err) = self . flush ( ) {
213+ self . err = Some ( err) ;
214+ }
215+ if let Some ( err) = self . err {
216+ return Err ( err) ;
217+ }
218+ Ok ( self . delegate . finish ( ) )
219+ }
220+ }
221+
222+ /// An implementation that fails if the input isn't UTF-8.
223+ impl ConsumeHunk for String {
224+ type Out = Self ;
225+
226+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
227+ self . push_str ( header) ;
228+ self . push_str (
229+ hunk. to_str ( )
230+ . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?,
231+ ) ;
232+ Ok ( ( ) )
233+ }
234+
235+ fn finish ( self ) -> Self :: Out {
236+ self
237+ }
238+ }
239+
240+ /// An implementation that writes hunks into a byte buffer.
241+ impl ConsumeHunk for Vec < u8 > {
242+ type Out = Self ;
243+
244+ fn consume_hunk ( & mut self , _: u32 , _: u32 , _: u32 , _: u32 , header : & str , hunk : & [ u8 ] ) -> std:: io:: Result < ( ) > {
245+ self . push_str ( header) ;
246+ self . push_str ( hunk) ;
247+ Ok ( ( ) )
248+ }
249+
250+ fn finish ( self ) -> Self :: Out {
251+ self
172252 }
173253 }
174254}
0 commit comments