Skip to content

Commit 0e48877

Browse files
alambetseidl
andauthored
Add Parquet arrow_reader benchmarks for {u}int{8,16} columns (#7484)
* allow for reading improperly encode UINT_8 and UINT_16 parquet data * add some benchmarks * remove print * checkpoint some experimental code * checkpoint * add a few more types * modify comment * another edit * Revert non benchmark code --------- Co-authored-by: Ed Seidl <[email protected]> Co-authored-by: seidl <[email protected]>
1 parent 0785d3f commit 0e48877

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed

parquet/benches/arrow_reader.rs

+90
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ fn build_test_schema() -> SchemaDescPtr {
7676
OPTIONAL FIXED_LEN_BYTE_ARRAY (8) optional_flba8_leaf;
7777
REQUIRED FIXED_LEN_BYTE_ARRAY (16) mandatory_flba16_leaf;
7878
OPTIONAL FIXED_LEN_BYTE_ARRAY (16) optional_flba16_leaf;
79+
REQUIRED INT32 mandatory_uint8_leaf (INTEGER(8, false));
80+
OPTIONAL INT32 optional_uint8_leaf (INTEGER(8, false));
81+
REQUIRED INT32 mandatory_uint16_leaf (INTEGER(16, false));
82+
OPTIONAL INT32 optional_uint16_leaf (INTEGER(16, false));
83+
REQUIRED INT32 mandatory_uint32_leaf (INTEGER(32, false));
84+
OPTIONAL INT32 optional_uint32_leaf (INTEGER(32, false));
85+
REQUIRED INT32 mandatory_int8_leaf (INTEGER(8, true));
86+
OPTIONAL INT32 optional_int8_leaf (INTEGER(8, true));
87+
REQUIRED INT32 mandatory_int16_leaf (INTEGER(16, true));
88+
OPTIONAL INT32 optional_int16_leaf (INTEGER(16, true));
89+
REQUIRED INT64 mandatory_uint64_leaf (INTEGER(64, false));
90+
OPTIONAL INT64 optional_uint64_leaf (INTEGER(64, false));
7991
}
8092
";
8193
parse_message_type(message_type)
@@ -1280,6 +1292,18 @@ fn add_benches(c: &mut Criterion) {
12801292
let string_list_desc = schema.column(14);
12811293
let mandatory_binary_column_desc = schema.column(15);
12821294
let optional_binary_column_desc = schema.column(16);
1295+
let mandatory_uint8_column_desc = schema.column(27);
1296+
let optional_uint8_column_desc = schema.column(28);
1297+
let mandatory_uint16_column_desc = schema.column(29);
1298+
let optional_uint16_column_desc = schema.column(30);
1299+
let mandatory_uint32_column_desc = schema.column(31);
1300+
let optional_uint32_column_desc = schema.column(32);
1301+
let mandatory_int8_column_desc = schema.column(33);
1302+
let optional_int8_column_desc = schema.column(34);
1303+
let mandatory_int16_column_desc = schema.column(35);
1304+
let optional_int16_column_desc = schema.column(36);
1305+
let mandatory_uint64_column_desc = schema.column(37);
1306+
let optional_uint64_column_desc = schema.column(38);
12831307

12841308
// primitive / int32 benchmarks
12851309
// =============================
@@ -1294,6 +1318,61 @@ fn add_benches(c: &mut Criterion) {
12941318
);
12951319
group.finish();
12961320

1321+
// primitive int32 / logical uint8 benchmarks
1322+
let mut group = c.benchmark_group("arrow_array_reader/UInt8Array");
1323+
bench_primitive::<Int32Type>(
1324+
&mut group,
1325+
&mandatory_uint8_column_desc,
1326+
&optional_uint8_column_desc,
1327+
0,
1328+
256,
1329+
);
1330+
group.finish();
1331+
1332+
// primitive int32 / logical int8 benchmarks
1333+
let mut group = c.benchmark_group("arrow_array_reader/Int8Array");
1334+
bench_primitive::<Int32Type>(
1335+
&mut group,
1336+
&mandatory_int8_column_desc,
1337+
&optional_int8_column_desc,
1338+
0,
1339+
128,
1340+
);
1341+
group.finish();
1342+
1343+
// primitive int32 / logical uint16 benchmarks
1344+
let mut group = c.benchmark_group("arrow_array_reader/UInt16Array");
1345+
bench_primitive::<Int32Type>(
1346+
&mut group,
1347+
&mandatory_uint16_column_desc,
1348+
&optional_uint16_column_desc,
1349+
0,
1350+
65536,
1351+
);
1352+
group.finish();
1353+
1354+
// primitive int32 / logical int16 benchmarks
1355+
let mut group = c.benchmark_group("arrow_array_reader/Int16Array");
1356+
bench_primitive::<Int32Type>(
1357+
&mut group,
1358+
&mandatory_int16_column_desc,
1359+
&optional_int16_column_desc,
1360+
0,
1361+
32768,
1362+
);
1363+
group.finish();
1364+
1365+
// primitive int32 / logical uint32 benchmarks
1366+
let mut group = c.benchmark_group("arrow_array_reader/UInt32Array");
1367+
bench_primitive::<Int32Type>(
1368+
&mut group,
1369+
&mandatory_uint32_column_desc,
1370+
&optional_uint32_column_desc,
1371+
0,
1372+
1000,
1373+
);
1374+
group.finish();
1375+
12971376
// primitive / int64 benchmarks
12981377
// =============================
12991378

@@ -1307,6 +1386,17 @@ fn add_benches(c: &mut Criterion) {
13071386
);
13081387
group.finish();
13091388

1389+
// primitive int64 / logical uint64 benchmarks
1390+
let mut group = c.benchmark_group("arrow_array_reader/UInt64Array");
1391+
bench_primitive::<Int64Type>(
1392+
&mut group,
1393+
&mandatory_uint64_column_desc,
1394+
&optional_uint64_column_desc,
1395+
0,
1396+
1000,
1397+
);
1398+
group.finish();
1399+
13101400
// string benchmarks
13111401
//==============================
13121402

0 commit comments

Comments
 (0)