@@ -67,7 +67,7 @@ use super::{
67
67
} ;
68
68
69
69
/// Regex pattern for parsing arrow file names.
70
- ///
70
+ ///
71
71
/// # Format
72
72
/// The expected format is: `<schema_key>.<front_part>.<file_id>.data.arrows`
73
73
/// where:
@@ -76,28 +76,28 @@ use super::{
76
76
/// - front_part: Captured for parquet file naming, contains the timestamp associted with current/time-partition
77
77
/// as well as the custom partitioning key=value pairs (e.g., "date=2020-01-21.hour=10.minute=30.key1=value1.key2=value2.ee529ffc8e76")
78
78
/// - file_id: Numeric id for individual arrows files
79
- ///
79
+ ///
80
80
/// # Limitations
81
81
/// - Partition keys and values must only contain alphanumeric characters
82
82
/// - Special characters in partition values will cause the pattern to fail in capturing
83
- ///
83
+ ///
84
84
/// # Examples
85
85
/// Valid: "key1=value1,key2=value2"
86
86
/// Invalid: "key1=special!value,key2=special#value"
87
87
static ARROWS_NAME_STRUCTURE : Lazy < Regex > = Lazy :: new ( || {
88
88
Regex :: new ( r"^[a-zA-Z0-9&=]+\.(?P<front>\S+)\.\d+\.data\.arrows$" ) . expect ( "Validated regex" )
89
89
} ) ;
90
90
91
+ /// Returns the filename for parquet if provided arrows file path is valid as per our expectation
91
92
fn arrow_path_to_parquet ( path : & Path , random_string : & str ) -> Option < PathBuf > {
92
93
let filename = path. file_name ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) ;
93
94
let filename = ARROWS_NAME_STRUCTURE
94
95
. captures ( filename)
95
96
. and_then ( |c| c. get ( 1 ) ) ?
96
97
. as_str ( ) ;
97
- let filename_with_random_number = format ! ( "{filename}.data.{random_string}.arrows " ) ;
98
+ let filename_with_random_number = format ! ( "{filename}.data.{random_string}.parquet " ) ;
98
99
let mut parquet_path = path. to_owned ( ) ;
99
100
parquet_path. set_file_name ( filename_with_random_number) ;
100
- parquet_path. set_extension ( "parquet" ) ;
101
101
102
102
Some ( parquet_path)
103
103
}
0 commit comments