@@ -86,52 +86,118 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
86
86
type Output = String ;
87
87
88
88
fn concat ( & self ) -> String {
89
- if self . is_empty ( ) {
90
- return String :: new ( ) ;
91
- }
92
-
93
- // `len` calculation may overflow but push_str will check boundaries
94
- let len = self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum ( ) ;
95
- let mut result = String :: with_capacity ( len) ;
96
-
97
- for s in self {
98
- result. push_str ( s. borrow ( ) )
99
- }
100
-
101
- result
89
+ self . join ( "" )
102
90
}
103
91
104
92
fn join ( & self , sep : & str ) -> String {
105
- if self . is_empty ( ) {
106
- return String :: new ( ) ;
93
+ unsafe {
94
+ String :: from_utf8_unchecked ( join_generic_copy ( self , sep . as_bytes ( ) ) )
107
95
}
96
+ }
108
97
109
- // concat is faster
110
- if sep . is_empty ( ) {
111
- return self . concat ( ) ;
112
- }
98
+ fn connect ( & self , sep : & str ) -> String {
99
+ self . join ( sep )
100
+ }
101
+ }
113
102
114
- // this is wrong without the guarantee that `self` is non-empty
115
- // `len` calculation may overflow but push_str but will check boundaries
116
- let len = sep. len ( ) * ( self . len ( ) - 1 ) +
117
- self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum :: < usize > ( ) ;
118
- let mut result = String :: with_capacity ( len) ;
119
- let mut first = true ;
103
+ macro_rules! spezialize_for_lengths {
104
+ ( $separator: expr, $target: expr, $iter: expr; $( $num: expr) ,* ) => {
105
+ let mut target = $target;
106
+ let iter = $iter;
107
+ let sep_len = $separator. len( ) ;
108
+ let sep_bytes = $separator;
109
+ match $separator. len( ) {
110
+ $(
111
+ // loops with hardcoded sizes run much faster
112
+ // specialize the cases with small separator lengths
113
+ $num => {
114
+ for s in iter {
115
+ target. get_unchecked_mut( ..$num)
116
+ . copy_from_slice( sep_bytes) ;
117
+
118
+ let s_bytes = s. borrow( ) . as_ref( ) ;
119
+ let offset = s_bytes. len( ) ;
120
+ target = { target} . get_unchecked_mut( $num..) ;
121
+ target. get_unchecked_mut( ..offset)
122
+ . copy_from_slice( s_bytes) ;
123
+ target = { target} . get_unchecked_mut( offset..) ;
124
+ }
125
+ } ,
126
+ ) *
127
+ 0 => {
128
+ // concat, same principle without the separator
129
+ for s in iter {
130
+ let s_bytes = s. borrow( ) . as_ref( ) ;
131
+ let offset = s_bytes. len( ) ;
132
+ target. get_unchecked_mut( ..offset)
133
+ . copy_from_slice( s_bytes) ;
134
+ target = { target} . get_unchecked_mut( offset..) ;
135
+ }
136
+ } ,
137
+ _ => {
138
+ // arbitrary non-zero size fallback
139
+ for s in iter {
140
+ target. get_unchecked_mut( ..sep_len)
141
+ . copy_from_slice( sep_bytes) ;
142
+
143
+ let s_bytes = s. borrow( ) . as_ref( ) ;
144
+ let offset = s_bytes. len( ) ;
145
+ target = { target} . get_unchecked_mut( sep_len..) ;
146
+ target. get_unchecked_mut( ..offset)
147
+ . copy_from_slice( s_bytes) ;
148
+ target = { target} . get_unchecked_mut( offset..) ;
149
+ }
150
+ }
151
+ }
152
+ } ;
153
+ }
120
154
121
- for s in self {
122
- if first {
123
- first = false ;
124
- } else {
125
- result. push_str ( sep) ;
155
+ // Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
156
+ // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
157
+ // For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
158
+ // only user of this function. It is left in place for the time when that is fixed.
159
+ //
160
+ // the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
161
+ // [T] and str both impl AsRef<[T]> for some T
162
+ // => s.borrow().as_ref() and we always have slices
163
+ fn join_generic_copy < B , T , S > ( slice : & [ S ] , sep : & [ T ] ) -> Vec < T >
164
+ where
165
+ T : Copy ,
166
+ B : AsRef < [ T ] > + ?Sized ,
167
+ S : Borrow < B > ,
168
+ {
169
+ let sep_len = sep. len ( ) ;
170
+ let mut iter = slice. iter ( ) ;
171
+ iter. next ( ) . map_or ( vec ! [ ] , |first| {
172
+ // this is wrong without the guarantee that `slice` is non-empty
173
+ // if the `len` calculation overflows, we'll panic
174
+ // we would have run out of memory anyway and the rest of the function requires
175
+ // the entire String pre-allocated for safety
176
+ //
177
+ // this is the exact len of the resulting String
178
+ let len = sep_len. checked_mul ( slice. len ( ) - 1 ) . and_then ( |n| {
179
+ slice. iter ( ) . map ( |s| s. borrow ( ) . as_ref ( ) . len ( ) ) . try_fold ( n, usize:: checked_add)
180
+ } ) . expect ( "attempt to join into collection with len > usize::MAX" ) ;
181
+
182
+ // crucial for safety
183
+ let mut result = Vec :: with_capacity ( len) ;
184
+
185
+ unsafe {
186
+ result. extend_from_slice ( first. borrow ( ) . as_ref ( ) ) ;
187
+
188
+ {
189
+ let pos = result. len ( ) ;
190
+ let target = result. get_unchecked_mut ( pos..len) ;
191
+
192
+ // copy separator and strs over without bounds checks
193
+ // generate loops with hardcoded offsets for small separators
194
+ // massive improvements possible (~ x2)
195
+ spezialize_for_lengths ! ( sep, target, iter; 1 , 2 , 3 , 4 ) ;
126
196
}
127
- result. push_str ( s . borrow ( ) ) ;
197
+ result. set_len ( len ) ;
128
198
}
129
199
result
130
- }
131
-
132
- fn connect ( & self , sep : & str ) -> String {
133
- self . join ( sep)
134
- }
200
+ } )
135
201
}
136
202
137
203
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
0 commit comments