Skip to content

Commit 9c10041

Browse files
author
Devdutt Shenoi
committed
doc: arrow_path_to_parquet
1 parent 1a75118 commit 9c10041

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/parseable/streams.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ use super::{
6767
};
6868

6969
/// Regex pattern for parsing arrow file names.
70-
///
70+
///
7171
/// # Format
7272
/// The expected format is: `<schema_key>.<front_part>.<file_id>.data.arrows`
7373
/// where:
@@ -76,28 +76,28 @@ use super::{
7676
/// - front_part: Captured for parquet file naming, contains the timestamp associted with current/time-partition
7777
/// as well as the custom partitioning key=value pairs (e.g., "date=2020-01-21.hour=10.minute=30.key1=value1.key2=value2.ee529ffc8e76")
7878
/// - file_id: Numeric id for individual arrows files
79-
///
79+
///
8080
/// # Limitations
8181
/// - Partition keys and values must only contain alphanumeric characters
8282
/// - Special characters in partition values will cause the pattern to fail in capturing
83-
///
83+
///
8484
/// # Examples
8585
/// Valid: "key1=value1,key2=value2"
8686
/// Invalid: "key1=special!value,key2=special#value"
8787
static ARROWS_NAME_STRUCTURE: Lazy<Regex> = Lazy::new(|| {
8888
Regex::new(r"^[a-zA-Z0-9&=]+\.(?P<front>\S+)\.\d+\.data\.arrows$").expect("Validated regex")
8989
});
9090

91+
/// Returns the filename for parquet if provided arrows file path is valid as per our expectation
9192
fn arrow_path_to_parquet(path: &Path, random_string: &str) -> Option<PathBuf> {
9293
let filename = path.file_name().unwrap().to_str().unwrap();
9394
let filename = ARROWS_NAME_STRUCTURE
9495
.captures(filename)
9596
.and_then(|c| c.get(1))?
9697
.as_str();
97-
let filename_with_random_number = format!("{filename}.data.{random_string}.arrows");
98+
let filename_with_random_number = format!("{filename}.data.{random_string}.parquet");
9899
let mut parquet_path = path.to_owned();
99100
parquet_path.set_file_name(filename_with_random_number);
100-
parquet_path.set_extension("parquet");
101101

102102
Some(parquet_path)
103103
}

0 commit comments

Comments
 (0)