@@ -115,7 +115,6 @@ pub trait EventFormat: Sized {
115
115
fn to_data (
116
116
self ,
117
117
schema : & HashMap < String , Arc < Field > > ,
118
- time_partition : Option < & String > ,
119
118
schema_version : SchemaVersion ,
120
119
static_schema_flag : bool ,
121
120
) -> Result < ( Self :: Data , EventSchema , bool ) , AnyError > ;
@@ -134,12 +133,8 @@ pub trait EventFormat: Sized {
134
133
p_custom_fields : & HashMap < String , String > ,
135
134
) -> Result < ( RecordBatch , bool ) , AnyError > {
136
135
let p_timestamp = self . get_p_timestamp ( ) ;
137
- let ( data, schema, is_first) = self . to_data (
138
- storage_schema,
139
- time_partition,
140
- schema_version,
141
- static_schema_flag,
142
- ) ?;
136
+ let ( data, schema, is_first) =
137
+ self . to_data ( storage_schema, schema_version, static_schema_flag) ?;
143
138
144
139
if get_field ( & schema, DEFAULT_TIMESTAMP_KEY ) . is_some ( ) {
145
140
return Err ( anyhow ! (
@@ -149,21 +144,22 @@ pub trait EventFormat: Sized {
149
144
} ;
150
145
151
146
// prepare the record batch and new fields to be added
152
- let mut new_schema = Arc :: new ( Schema :: new ( schema) ) ;
153
- if !Self :: is_schema_matching ( new_schema. clone ( ) , storage_schema, static_schema_flag) {
147
+ let mut new_schema = Schema :: new ( schema) ;
148
+ if !Self :: is_schema_matching ( & new_schema, storage_schema, static_schema_flag) {
154
149
return Err ( anyhow ! ( "Schema mismatch" ) ) ;
155
150
}
156
- new_schema = update_field_type_in_schema ( new_schema, None , time_partition) ;
157
151
158
- let rb = Self :: decode ( data, new_schema. clone ( ) ) ?;
152
+ update_field_type_in_schema ( & mut new_schema, Some ( storage_schema) , time_partition) ;
153
+ let updated_schema = Arc :: new ( new_schema) ;
159
154
155
+ let rb = Self :: decode ( data, updated_schema) ?;
160
156
let rb = add_parseable_fields ( rb, p_timestamp, p_custom_fields) ?;
161
157
162
158
Ok ( ( rb, is_first) )
163
159
}
164
160
165
161
fn is_schema_matching (
166
- new_schema : Arc < Schema > ,
162
+ new_schema : & Schema ,
167
163
storage_schema : & HashMap < String , Arc < Field > > ,
168
164
static_schema_flag : bool ,
169
165
) -> bool {
@@ -200,7 +196,7 @@ pub trait EventFormat: Sized {
200
196
}
201
197
202
198
pub fn get_existing_field_names (
203
- inferred_schema : Arc < Schema > ,
199
+ inferred_schema : & Schema ,
204
200
existing_schema : Option < & HashMap < String , Arc < Field > > > ,
205
201
) -> HashSet < String > {
206
202
let mut existing_field_names = HashSet :: new ( ) ;
@@ -219,8 +215,8 @@ pub fn get_existing_field_names(
219
215
220
216
pub fn override_existing_timestamp_fields (
221
217
existing_schema : & HashMap < String , Arc < Field > > ,
222
- inferred_schema : Arc < Schema > ,
223
- ) -> Arc < Schema > {
218
+ inferred_schema : & mut Schema ,
219
+ ) {
224
220
let timestamp_field_names: HashSet < String > = existing_schema
225
221
. values ( )
226
222
. filter_map ( |field| {
@@ -231,7 +227,8 @@ pub fn override_existing_timestamp_fields(
231
227
}
232
228
} )
233
229
. collect ( ) ;
234
- let updated_fields: Vec < Arc < Field > > = inferred_schema
230
+
231
+ inferred_schema. fields = inferred_schema
235
232
. fields ( )
236
233
. iter ( )
237
234
. map ( |field| {
@@ -246,28 +243,24 @@ pub fn override_existing_timestamp_fields(
246
243
}
247
244
} )
248
245
. collect ( ) ;
249
-
250
- Arc :: new ( Schema :: new ( updated_fields) )
251
246
}
252
247
253
248
pub fn update_field_type_in_schema (
254
- inferred_schema : Arc < Schema > ,
249
+ inferred_schema : & mut Schema ,
255
250
existing_schema : Option < & HashMap < String , Arc < Field > > > ,
256
251
time_partition : Option < & String > ,
257
- ) -> Arc < Schema > {
258
- let mut updated_schema = inferred_schema. clone ( ) ;
259
- let existing_field_names = get_existing_field_names ( inferred_schema. clone ( ) , existing_schema) ;
260
-
252
+ ) {
253
+ let existing_field_names = get_existing_field_names ( inferred_schema, existing_schema) ;
261
254
if let Some ( existing_schema) = existing_schema {
262
255
// overriding known timestamp fields which were inferred as string fields
263
- updated_schema = override_existing_timestamp_fields ( existing_schema, updated_schema ) ;
256
+ override_existing_timestamp_fields ( existing_schema, inferred_schema ) ;
264
257
}
265
258
266
259
let Some ( time_partition) = time_partition else {
267
- return updated_schema ;
260
+ return ;
268
261
} ;
269
262
270
- let new_schema : Vec < Field > = updated_schema
263
+ inferred_schema . fields = inferred_schema
271
264
. fields ( )
272
265
. iter ( )
273
266
. map ( |field| {
@@ -283,5 +276,4 @@ pub fn update_field_type_in_schema(
283
276
}
284
277
} )
285
278
. collect ( ) ;
286
- Arc :: new ( Schema :: new ( new_schema) )
287
279
}
0 commit comments