@@ -29,6 +29,7 @@ use arrow_ipc::writer::FileWriter;
29
29
use arrow_schema:: Schema ;
30
30
use arrow_select:: concat:: concat_batches;
31
31
use itertools:: Itertools ;
32
+ use tracing:: trace;
32
33
33
34
use crate :: parseable:: { ARROW_FILE_EXTENSION , ARROW_PART_FILE_EXTENSION } ;
34
35
use crate :: utils:: arrow:: adapt_batch;
@@ -40,17 +41,12 @@ pub struct DiskWriter<const N: usize> {
40
41
inner : FileWriter < BufWriter < File > > ,
41
42
/// Used to ensure un"finish"ed arrow files are renamed on "finish"
42
43
path_prefix : String ,
43
- /// Number of rows written onto disk
44
- count : usize ,
45
- /// Denotes distinct files created with similar schema during the same minute by the same ingestor
46
- file_id : usize ,
47
44
}
48
45
49
46
impl < const N : usize > DiskWriter < N > {
50
47
pub fn new ( path_prefix : String , schema : & Schema ) -> Result < Self , StagingError > {
51
- let file_id = 0 ;
52
48
// Live writes happen into partfile
53
- let partfile_path = format ! ( "{path_prefix}.{file_id}.{ ARROW_PART_FILE_EXTENSION}" ) ;
49
+ let partfile_path = format ! ( "{path_prefix}.{ARROW_PART_FILE_EXTENSION}" ) ;
54
50
let file = OpenOptions :: new ( )
55
51
. create ( true )
56
52
. append ( true )
@@ -60,29 +56,12 @@ impl<const N: usize> DiskWriter<N> {
60
56
inner : FileWriter :: try_new_buffered ( file, schema)
61
57
. expect ( "File and RecordBatch both are checked" ) ,
62
58
path_prefix,
63
- count : 0 ,
64
- file_id,
65
59
} )
66
60
}
67
61
68
- /// Appends records into an `{file_id}.part.arrows` files,
69
- /// flushing onto disk and increments count on breaching row limit.
62
+ /// Appends records into a `.part.arrows` file
70
63
pub fn write ( & mut self , rb : & RecordBatch ) -> Result < ( ) , StagingError > {
71
- if self . count + rb. num_rows ( ) >= N {
72
- let left = N - self . count ;
73
- let left_slice = rb. slice ( 0 , left) ;
74
- self . inner . write ( & left_slice) ?;
75
- self . finish ( ) ?;
76
-
77
- // Write leftover records into new files until all have been written
78
- if left < rb. num_rows ( ) {
79
- let right = rb. num_rows ( ) - left;
80
- self . write ( & rb. slice ( left, right) ) ?;
81
- }
82
- } else {
83
- self . inner . write ( rb) ?;
84
- self . count += rb. num_rows ( ) ;
85
- }
64
+ self . inner . write ( rb) ?;
86
65
87
66
Ok ( ( ) )
88
67
}
@@ -91,32 +70,12 @@ impl<const N: usize> DiskWriter<N> {
91
70
pub fn finish ( & mut self ) -> Result < ( ) , StagingError > {
92
71
self . inner . finish ( ) ?;
93
72
94
- let partfile_path = format ! (
95
- "{}.{}.{ARROW_PART_FILE_EXTENSION}" ,
96
- self . path_prefix, self . file_id
97
- ) ;
98
- let arrows_path = format ! (
99
- "{}.{}.{ARROW_FILE_EXTENSION}" ,
100
- self . path_prefix, self . file_id
101
- ) ;
73
+ let partfile_path = format ! ( "{}.{ARROW_PART_FILE_EXTENSION}" , self . path_prefix) ;
74
+ let arrows_path = format ! ( "{}.{ARROW_FILE_EXTENSION}" , self . path_prefix) ;
102
75
103
76
// Rename from part file to finished arrows file
104
- std:: fs:: rename ( partfile_path, arrows_path) ?;
105
-
106
- self . file_id += 1 ;
107
- self . count = 0 ;
108
-
109
- let partfile_path = format ! (
110
- "{}.{}.{ARROW_PART_FILE_EXTENSION}" ,
111
- self . path_prefix, self . file_id
112
- ) ;
113
- let file = OpenOptions :: new ( )
114
- . create ( true )
115
- . append ( true )
116
- . open ( partfile_path) ?;
117
-
118
- self . inner = FileWriter :: try_new_buffered ( file, self . inner . schema ( ) )
119
- . expect ( "File and RecordBatch both are checked" ) ;
77
+ std:: fs:: rename ( partfile_path, & arrows_path) ?;
78
+ trace ! ( "Finished arrows file: {arrows_path}" ) ;
120
79
121
80
Ok ( ( ) )
122
81
}
0 commit comments