Skip to content

Commit 6fc14b6

Browse files
authored
Allow reading of improperly constructed empty lists in Parquet metadata (#8827)
# Which issue does this PR close? - Closes #8826. # Rationale for this change As reported in the issue, some writers will use an element type of 0 for an empty list. This is not thrift compact protocol spec compliant, but many readers (including this crate prior to 57.0.0) tolerate this. # What changes are included in this PR? Adds a special case to `read_list_begin` for a 0 length list with a type of 0. # Are these changes tested? Yes # Are there any user-facing changes? No, internal change only
1 parent af2c460 commit 6fc14b6

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

parquet/src/parquet_thrift.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl From<ThriftProtocolError> for ParquetError {
6161
general_err!("Unexpected struct field type {}", value)
6262
}
6363
ThriftProtocolError::InvalidElementType(value) => {
64-
general_err!("Unexpected list/set element type{}", value)
64+
general_err!("Unexpected list/set element type {}", value)
6565
}
6666
ThriftProtocolError::FieldDeltaOverflow {
6767
field_delta,
@@ -302,6 +302,14 @@ pub(crate) trait ThriftCompactInputProtocol<'a> {
302302
/// Read the [`ListIdentifier`] for a Thrift encoded list.
303303
fn read_list_begin(&mut self) -> ThriftProtocolResult<ListIdentifier> {
304304
let header = self.read_byte()?;
305+
// some parquet writers will have an element_type of 0 for an empty list.
306+
// account for that and return a bogus but valid element_type.
307+
if header == 0 {
308+
return Ok(ListIdentifier {
309+
element_type: ElementType::Byte,
310+
size: 0,
311+
});
312+
}
305313
let element_type = ElementType::try_from(header & 0x0f)?;
306314

307315
let possible_element_count = (header & 0xF0) >> 4;
@@ -1089,4 +1097,13 @@ pub(crate) mod tests {
10891097
test_roundtrip(TimeUnit::MICROS);
10901098
test_roundtrip(TimeUnit::NANOS);
10911099
}
1100+
1101+
#[test]
1102+
fn test_decode_empty_list() {
1103+
let data = vec![0u8; 1];
1104+
let mut prot = ThriftSliceInputProtocol::new(&data);
1105+
let header = prot.read_list_begin().expect("error reading list header");
1106+
assert_eq!(header.size, 0);
1107+
assert_eq!(header.element_type, ElementType::Byte);
1108+
}
10921109
}

0 commit comments

Comments
 (0)