From 5f6b8dae58d7fb6bf1adeb8454455a327bb3fffa Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 23 Sep 2024 19:12:27 +1000 Subject: [PATCH] Fix TPCH data conversion --- scripts/convert_tpch.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/convert_tpch.py b/scripts/convert_tpch.py index 23bdda0..524201e 100644 --- a/scripts/convert_tpch.py +++ b/scripts/convert_tpch.py @@ -113,9 +113,11 @@ } for table in tables: + schema = schemas[table] tbl = csv.read_csv( f"benchmark_data/{table}.tbl", + read_options=csv.ReadOptions(column_names=schema.names), parse_options=csv.ParseOptions(delimiter="|"), - convert_options=csv.ConvertOptions(column_types=schemas[table]), + convert_options=csv.ConvertOptions(column_types=schema), ) - orc.write_table(tbl, f"benchmark_data/{table}.orc", compression="zstd") + orc.write_table(tbl, f"benchmark_data/{table}.orc")