1919
2020use std:: collections:: HashMap ;
2121use std:: sync:: Arc ;
22- use std:: sync:: atomic:: AtomicI64 ;
2322
2423use arrow_schema:: SchemaRef as ArrowSchemaRef ;
2524use bytes:: Bytes ;
@@ -36,7 +35,6 @@ use parquet::thrift::{TCompactOutputProtocol, TSerializable};
3635use thrift:: protocol:: TOutputProtocol ;
3736
3837use super :: location_generator:: { FileNameGenerator , LocationGenerator } ;
39- use super :: track_writer:: TrackWriter ;
4038use super :: { FileWriter , FileWriterBuilder } ;
4139use crate :: arrow:: {
4240 ArrowFileReader , DEFAULT_MAP_FIELD_NAME , NanValueCountVisitor , get_parquet_stat_max_as_datum,
@@ -87,7 +85,6 @@ impl<T: LocationGenerator, F: FileNameGenerator> FileWriterBuilder for ParquetWr
8785 type R = ParquetWriter ;
8886
8987 async fn build ( self ) -> Result < Self :: R > {
90- let written_size = Arc :: new ( AtomicI64 :: new ( 0 ) ) ;
9188 let out_file = self . file_io . new_output (
9289 self . location_generator
9390 . generate_location ( & self . file_name_generator . generate_file_name ( ) ) ,
@@ -97,7 +94,6 @@ impl<T: LocationGenerator, F: FileNameGenerator> FileWriterBuilder for ParquetWr
9794 schema : self . schema . clone ( ) ,
9895 inner_writer : None ,
9996 writer_properties : self . props ,
100- written_size,
10197 current_row_num : 0 ,
10298 out_file,
10399 nan_value_count_visitor : NanValueCountVisitor :: new ( ) ,
@@ -227,11 +223,8 @@ impl SchemaVisitor for IndexByParquetPathName {
227223pub struct ParquetWriter {
228224 schema : SchemaRef ,
229225 out_file : OutputFile ,
230- inner_writer : Option < AsyncArrowWriter < AsyncFileWriter < TrackWriter > > > ,
226+ inner_writer : Option < AsyncArrowWriter < AsyncFileWriter < Box < dyn FileWrite > > > > ,
231227 writer_properties : WriterProperties ,
232- // written_size is only accurate after closing the inner writer,
233- // because the inner writer flushes data asynchronously.
234- written_size : Arc < AtomicI64 > ,
235228 current_row_num : usize ,
236229 nan_value_count_visitor : NanValueCountVisitor ,
237230}
@@ -534,8 +527,7 @@ impl FileWriter for ParquetWriter {
534527 writer
535528 } else {
536529 let arrow_schema: ArrowSchemaRef = Arc :: new ( self . schema . as_ref ( ) . try_into ( ) ?) ;
537- let inner_writer =
538- TrackWriter :: new ( self . out_file . writer ( ) . await ?, self . written_size . clone ( ) ) ;
530+ let inner_writer = self . out_file . writer ( ) . await ?;
539531 let async_writer = AsyncFileWriter :: new ( inner_writer) ;
540532 let writer = AsyncArrowWriter :: try_new (
541533 async_writer,
@@ -562,16 +554,16 @@ impl FileWriter for ParquetWriter {
562554 }
563555
564556 async fn close ( mut self ) -> Result < Vec < DataFileBuilder > > {
565- let writer = match self . inner_writer . take ( ) {
557+ let mut writer = match self . inner_writer . take ( ) {
566558 Some ( writer) => writer,
567559 None => return Ok ( vec ! [ ] ) ,
568560 } ;
569561
570- let metadata = writer. close ( ) . await . map_err ( |err| {
571- Error :: new ( ErrorKind :: Unexpected , "Failed to close parquet writer." ) . with_source ( err)
562+ let metadata = writer. finish ( ) . await . map_err ( |err| {
563+ Error :: new ( ErrorKind :: Unexpected , "Failed to finish parquet writer." ) . with_source ( err)
572564 } ) ?;
573565
574- let written_size = self . written_size . load ( std :: sync :: atomic :: Ordering :: Relaxed ) ;
566+ let written_size = writer . bytes_written ( ) ;
575567
576568 if self . current_row_num == 0 {
577569 self . out_file . delete ( ) . await . map_err ( |err| {
@@ -595,7 +587,7 @@ impl FileWriter for ParquetWriter {
595587 Ok ( vec ! [ Self :: parquet_to_data_file_builder(
596588 self . schema,
597589 parquet_metadata,
598- written_size as usize ,
590+ written_size,
599591 self . out_file. location( ) . to_string( ) ,
600592 self . nan_value_count_visitor. nan_value_counts,
601593 ) ?] )
0 commit comments