-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22 from WenyXu/feat/support-tiny-int
feat: support to read tinyint
- Loading branch information
Showing
9 changed files
with
178 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
use snafu::OptionExt; | ||
|
||
use crate::arrow_reader::column::present::new_present_iter; | ||
use crate::arrow_reader::column::{Column, NullableIterator}; | ||
use crate::error::{InvalidColumnSnafu, Result}; | ||
use crate::proto::stream::Kind; | ||
use crate::reader::decode::byte_rle::ByteRleIter; | ||
|
||
pub fn new_i8_iter(column: &Column) -> Result<NullableIterator<i8>> { | ||
let present = new_present_iter(column)?.collect::<Result<Vec<_>>>()?; | ||
let rows: usize = present.iter().filter(|&p| *p).count(); | ||
|
||
let iter = column | ||
.stream(Kind::Data) | ||
.transpose()? | ||
.map(|reader| { | ||
Box::new(ByteRleIter::new(reader, rows).map(|value| value.map(|value| value as i8))) | ||
as _ | ||
}) | ||
.context(InvalidColumnSnafu { name: &column.name })?; | ||
|
||
Ok(NullableIterator { | ||
present: Box::new(present.into_iter()), | ||
iter, | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
pub mod boolean_rle; | ||
pub mod byte_rle; | ||
pub mod float; | ||
pub mod rle_v2; | ||
mod util; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
use crate::error::Result; | ||
use std::io::Read; | ||
|
||
use super::util::read_u8; | ||
|
||
const MAX_LITERAL_SIZE: usize = 128; | ||
const MIN_REPEAT_SIZE: usize = 3; | ||
|
||
pub struct ByteRleIter<R: Read> { | ||
reader: R, | ||
literals: [u8; MAX_LITERAL_SIZE], | ||
num_literals: usize, | ||
used: usize, | ||
repeat: bool, | ||
remaining: usize, | ||
} | ||
|
||
impl<R: Read> ByteRleIter<R> { | ||
pub fn new(reader: R, length: usize) -> Self { | ||
Self { | ||
reader, | ||
literals: [0u8; MAX_LITERAL_SIZE], | ||
num_literals: 0, | ||
used: 0, | ||
repeat: false, | ||
remaining: length, | ||
} | ||
} | ||
|
||
pub fn into_inner(self) -> R { | ||
self.reader | ||
} | ||
|
||
fn read_byte(&mut self) -> Result<u8> { | ||
read_u8(&mut self.reader) | ||
} | ||
|
||
fn read_values(&mut self) -> Result<()> { | ||
let control = self.read_byte()?; | ||
self.used = 0; | ||
if control < 0x80 { | ||
self.repeat = true; | ||
self.num_literals = control as usize + MIN_REPEAT_SIZE; | ||
let val = self.read_byte()?; | ||
self.literals[0] = val; | ||
} else { | ||
self.repeat = false; | ||
self.num_literals = 0x100 - control as usize; | ||
for i in 0..self.num_literals { | ||
let result = self.read_byte()?; | ||
self.literals[i] = result; | ||
} | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
impl<R: Read> Iterator for ByteRleIter<R> { | ||
type Item = Result<u8>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if self.remaining == 0 { | ||
return None; | ||
} | ||
if self.used == self.num_literals { | ||
match self.read_values() { | ||
Ok(_) => {} | ||
Err(err) => return Some(Err(err)), | ||
} | ||
} | ||
|
||
let result = if self.repeat { | ||
self.literals[0] | ||
} else { | ||
self.literals[self.used] | ||
}; | ||
self.used += 1; | ||
self.remaining -= 1; | ||
Some(Ok(result)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
|
||
#[test] | ||
fn reader_test() { | ||
let data = [0x61u8, 0x00]; | ||
|
||
let data = &mut data.as_ref(); | ||
|
||
let iter = ByteRleIter::new(data, 100) | ||
.collect::<Result<Vec<_>>>() | ||
.unwrap(); | ||
|
||
assert_eq!(iter, vec![0; 100]); | ||
|
||
let data = [0x01, 0x01]; | ||
|
||
let data = &mut data.as_ref(); | ||
|
||
let iter = ByteRleIter::new(data, 4) | ||
.collect::<Result<Vec<_>>>() | ||
.unwrap(); | ||
|
||
assert_eq!(iter, vec![1; 4]); | ||
|
||
let data = [0xfe, 0x44, 0x45]; | ||
|
||
let data = &mut data.as_ref(); | ||
|
||
let iter = ByteRleIter::new(data, 2) | ||
.collect::<Result<Vec<_>>>() | ||
.unwrap(); | ||
|
||
assert_eq!(iter, vec![0x44, 0x45]); | ||
} | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters