Skip to content

Commit

Permalink
Merge pull request #8471 from sandflee/size2
Browse files Browse the repository at this point in the history
feat(hive): add filesize to avoid stat operate in opendal
  • Loading branch information
BohuTANG authored Oct 26, 2022
2 parents dfb904b + e0f34b1 commit f7cf086
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/query/storages/hive/src/hive_file_splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ impl HiveFileSplitter {
hive_file_info.filename.clone(),
hive_file_info.partition.clone(),
r,
hive_file_info.length,
)
})
.collect()
Expand Down
9 changes: 7 additions & 2 deletions src/query/storages/hive/src/hive_parquet_block_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,14 @@ impl HiveParquetBlockReader {
}
}

pub async fn read_meta_data(&self, dal: Operator, filename: &str) -> Result<Arc<FileMetaData>> {
pub async fn read_meta_data(
&self,
dal: Operator,
filename: &str,
filesize: u64,
) -> Result<Arc<FileMetaData>> {
let reader = FileMetaDataReader::new_reader(dal);
reader.read(filename, None, 0).await
reader.read(filename, Some(filesize), 0).await
}

pub async fn read_columns_data(
Expand Down
4 changes: 4 additions & 0 deletions src/query/storages/hive/src/hive_partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ pub struct HivePartInfo {
pub partitions: Option<String>,
// only the data in ranges belong to this partition
pub range: Range<u64>,
// file size
pub filesize: u64,
}

#[typetag::serde(name = "hive")]
Expand All @@ -51,11 +53,13 @@ impl HivePartInfo {
filename: String,
partitions: Option<String>,
range: Range<u64>,
filesize: u64,
) -> Arc<Box<dyn PartInfo>> {
Arc::new(Box::new(HivePartInfo {
filename,
partitions,
range,
filesize,
}))
}

Expand Down
2 changes: 1 addition & 1 deletion src/query/storages/hive/src/hive_table_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ impl Processor for HiveTableSource {
let part = HivePartInfo::from_part(&part)?;
let file_meta = self
.block_reader
.read_meta_data(self.dal.clone(), &part.filename)
.read_meta_data(self.dal.clone(), &part.filename, part.filesize)
.await?;
let mut hive_blocks = HiveBlocks::create(file_meta, part.clone());
match hive_blocks.prune() {
Expand Down

1 comment on commit f7cf086

@vercel
Copy link

@vercel vercel bot commented on f7cf086 Oct 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-databend.vercel.app
databend.vercel.app
databend-git-main-databend.vercel.app
databend.rs

Please sign in to comment.