Skip to content

Commit

Permalink
Merge pull request #302 from Mytherin/duckdb-v1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rschu1ze authored Feb 7, 2025
2 parents bcf821e + 9da39ad commit 548ad46
Show file tree
Hide file tree
Showing 15 changed files with 250 additions and 280 deletions.
1 change: 0 additions & 1 deletion duckdb-memory/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmen
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

/usr/bin/time -v ./memory.py

23 changes: 16 additions & 7 deletions duckdb-parquet/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
#!/bin/bash

# Install

sudo apt-get update
sudo apt-get install -y python3-pip
pip install --break-system-packages duckdb==1.1.3 psutil
sudo apt-get install ninja-build cmake build-essential make ccache pip clang -y

export CC=clang
export CXX=clang++
git clone https://github.com/duckdb/duckdb
cd duckdb
git checkout v1.2-histrionicus
GEN=ninja NATIVE_ARCH=1 LTO=thin make
export PATH="$PATH:`pwd`/build/release/"
cd ..

# Load the data
seq 0 99 | xargs -P100 -I{} bash -c 'wget --no-verbose --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'

./load.py
time duckdb hits.db -f create.sql

# Run the queries

./run.sh 2>&1 | tee log.txt

wc -c my-db.duckdb
wc -c hits.db

cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
cat log.txt |
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
8 changes: 4 additions & 4 deletions duckdb-parquet/create.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CREATE VIEW hits AS
SELECT *
REPLACE
(epoch_ms(EventTime * 1000) AS EventTime,
DATE '1970-01-01' + INTERVAL (EventDate) DAYS AS EventDate)
FROM read_parquet('hits_*.parquet', binary_as_string=True);
REPLACE (make_date(EventDate) AS EventDate)
FROM read_parquet('hits_*.parquet', binary_as_string=True);

CREATE MACRO toDateTime(t) AS epoch_ms(t * 1000);
14 changes: 0 additions & 14 deletions duckdb-parquet/load.py

This file was deleted.

4 changes: 2 additions & 2 deletions duckdb-parquet/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID, extract(minute FROM toDateTime(EventTime)) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Expand All @@ -40,4 +40,4 @@ SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000;
SELECT DATE_TRUNC('minute', toDateTime(EventTime)) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', toDateTime(EventTime)) ORDER BY DATE_TRUNC('minute', toDateTime(EventTime)) LIMIT 10 OFFSET 1000;
17 changes: 0 additions & 17 deletions duckdb-parquet/query.py

This file was deleted.

88 changes: 44 additions & 44 deletions duckdb-parquet/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"system": "DuckDB (Parquet, partitioned)",
"date": "2024-11-27",
"date": "2025-02-05",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "",
Expand All @@ -11,48 +11,48 @@
"data_size": 14737666736,

"result": [
[0.11052128000000039,0.07510095800000727,0.07482720299998391],
[0.12390973099999769,0.08799121000001264,0.08659421100000486],
[0.1802913619999913,0.11165366699998458,0.11256087000001003],
[0.44857502400000726,0.11868003600000065,0.11416359200001125],
[1.2294751780000013,0.42031310699999835,0.41501899200000025],
[1.0619799039999975,0.704478875999996,0.6945510980000051],
[0.463516052000017,0.4205021790000103,0.42350347199999305],
[0.14255578700002047,0.09078143099998215,0.091366378999993],
[0.9319725910000045,0.5142498320000186,0.5172727039999927],
[1.3762876150000238,0.7246325790000014,0.7255065909999985],
[0.5583005390000153,0.19166810400000145,0.1881575449999957],
[1.1524824470000112,0.22536062500000753,0.22139205199999878],
[1.4437344370000176,0.5995674480000162,0.5936260590000018],
[2.805760178000014,0.9231901580000113,0.9175548540000023],
[1.1569440859999816,0.651196504000012,0.6422786660000099],
[0.6690660820000005,0.47228665700001216,0.4744386749999876],
[2.791790689999999,1.121874450000007,1.1247018900000114],
[2.7981470900000147,1.125944488000016,1.141634646],
[5.138499918000008,1.9534744589999775,1.9856906880000054],
[0.20532803100002184,0.10882481699999857,0.11244283699997482],
[10.62995572700001,1.844938342000006,1.8348558930000252],
[11.701504103000019,1.7005332410000165,1.7053109770000106],
[22.958297070000015,3.5526050710000163,3.555595224000001],
[58.88756602899997,10.639712609000014,10.690652277000027],
[2.99689328300002,0.4794005240000274,0.483271789000014],
[0.9076006049999705,0.36497371699999803,0.35227883999999676],
[2.994342438999979,0.4895540190000247,0.479844522999997],
[9.353536024999983,1.4645006279999961,1.4759320379999963],
[10.568858570999964,10.158934629999976,10.11379384899999],
[4.335202347000006,4.255150033000007,4.283988227999998],
[2.25947083799997,0.7053096309999773,0.7094401290000292],
[5.932542424000019,0.8128257209999674,0.8173750340000083],
[5.391661251000016,2.37234533000003,2.3208716000000322],
[9.879454598000052,2.6484584210000435,2.640947945999983],
[9.876968317999967,2.6066815840000004,2.639813798999967],
[0.7822432080000112,0.6648381679999602,0.6626834680000115],
[0.24420177100000728,0.14329177199999776,0.15229572799995594],
[0.15579374700001836,0.11827062200001137,0.10689886399995885],
[0.1992192829999908,0.10196714500000326,0.10160259900004576],
[0.4412357339999744,0.2678184920000035,0.26373995500000547],
[0.12147571700000981,0.08117302400000881,0.08039329399997541],
[0.1181243000000336,0.08039320299997144,0.07783418700000766],
[0.11289640699999381,0.07863085899998623,0.07738770099996373]
[0.164,0.085,0.082],
[0.119,0.060,0.061],
[0.192,0.088,0.086],
[0.359,0.081,0.079],
[1.178,0.353,0.363],
[0.828,0.460,0.457],
[0.113,0.061,0.061],
[0.127,0.063,0.060],
[0.717,0.465,0.465],
[1.059,0.596,0.578],
[0.443,0.151,0.148],
[1.016,0.185,0.182],
[1.136,0.490,0.476],
[2.430,0.919,0.880],
[0.930,0.540,0.535],
[0.564,0.420,0.420],
[2.350,1.021,1.021],
[2.122,0.791,0.789],
[4.587,1.930,1.950],
[0.251,0.074,0.073],
[9.957,1.197,1.194],
[10.999,0.684,0.668],
[21.527,1.392,1.410],
[33.496,5.369,5.336],
[0.196,0.094,0.094],
[0.975,0.248,0.253],
[0.178,0.093,0.092],
[9.984,0.896,0.889],
[9.955,9.601,9.455],
[0.160,0.073,0.074],
[2.248,0.578,0.571],
[5.922,0.682,0.676],
[5.447,2.040,2.056],
[9.888,2.372,2.386],
[9.854,2.403,2.458],
[0.746,0.587,0.590],
[0.209,0.111,0.111],
[0.155,0.088,0.087],
[0.150,0.057,0.055],
[0.394,0.219,0.217],
[0.115,0.053,0.050],
[0.116,0.056,0.055],
[0.101,0.051,0.047]
]
}
88 changes: 44 additions & 44 deletions duckdb-parquet/results/c6a.metal.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"system": "DuckDB (Parquet, partitioned)",
"date": "2024-11-27",
"date": "2025-02-05",
"machine": "c6a.metal, 500gb gp2",
"cluster_size": 1,
"comment": "",
Expand All @@ -11,48 +11,48 @@
"data_size": 14737666736,

"result": [
[0.22064165999836405,0.10886333600137732,0.10035635300300783],
[0.2090159239996865,0.10079622400007793,0.08587361900208634],
[0.24158671799887088,0.10498254399863072,0.11356378700293135],
[0.39156127000023844,0.1437904560007155,0.11517444600031013],
[0.9492011050024303,0.23534582900174428,0.24136485300186905],
[0.9057871529985277,0.30914358100199024,0.23769009399984498],
[0.24534914900141302,0.1509247879985196,0.162418247997266],
[0.23070556400125497,0.11584803399819066,0.11269584900219343],
[0.7073371700025746,0.26606066500244197,0.22921901900190278],
[1.0556358089997957,0.295529817998613,0.26568887699977495],
[0.4892407519982953,0.1328769840001769,0.16416157899948303],
[0.8916413730003114,0.16927299000235507,0.15134500799831585],
[1.1896540300003835,0.28654627699870616,0.27039848700223956],
[2.5535861489988747,0.4687831190021825,0.42421876300068107],
[0.9975665820020367,0.362422553000215,0.2820111139990331],
[0.5269063039995672,0.39250073700168286,0.29078964099971927],
[2.2770834360017034,0.8704268709989265,0.7373391680011991],
[2.285241853998741,0.9774307129991939,0.7003227230015909],
[4.292322358000092,0.9742372030013939,0.6998918900026183],
[0.2506510780003737,0.11718380399906891,0.1293006730011257],
[9.77101984500041,0.527151017999131,0.431143694997445],
[10.999202312999842,0.3956347899984394,0.3579404030024307],
[21.5540906310016,0.6871144640026614,0.5319933459977619],
[55.29203715499898,1.9404848179983674,1.7484993639991444],
[2.612326053000288,0.1912991279968992,0.1776238040001772],
[0.918034816000727,0.19911728600345668,0.17915523299961933],
[2.8879784470009326,0.21927593299915316,0.20239478800067445],
[9.361416278999968,0.46229383399986546,0.409067109998432],
[8.779426084001898,2.899777353999525,3.370570749997569],
[0.7902931309981795,0.5913719390009646,0.5893235910007206],
[2.2267992609995417,0.2571639199995843,0.24316665399965132],
[5.867168218999723,0.4684128890003194,0.4540498100031982],
[4.697866193000664,0.9545511869982874,0.7677795919989876],
[9.78189673399902,1.4304991219978547,1.0890957099982188],
[9.900697042001411,1.1249232040026982,1.0547837240010267],
[0.6744621270008793,0.30249466600071173,0.2852872900002694],
[0.26121385999795166,0.16722514600041904,0.18514311000035377],
[0.21776700200280175,0.11190589800025919,0.1330010960009531],
[0.2297739960013132,0.11809357400125009,0.1213479809994169],
[0.46232909700120217,0.28833456100255717,0.27430850900054793],
[0.2237862940019113,0.1302182739964337,0.10985613000229932],
[0.22377537700231187,0.10815454900148325,0.11889209400032996],
[0.2163319399987813,0.11454355099704117,0.1296722500010219]
[0.277,0.140,0.149],
[0.129,0.072,0.068],
[0.136,0.081,0.081],
[0.329,0.080,0.065],
[0.950,0.145,0.149],
[0.829,0.200,0.199],
[0.124,0.063,0.068],
[0.149,0.092,0.078],
[0.716,0.167,0.161],
[1.077,0.188,0.177],
[0.417,0.097,0.092],
[0.807,0.103,0.100],
[1.155,0.240,0.226],
[2.313,0.407,0.340],
[0.886,0.225,0.214],
[0.492,0.172,0.186],
[2.139,0.337,0.304],
[2.103,0.282,0.281],
[4.014,0.466,0.433],
[0.177,0.070,0.060],
[9.792,0.331,0.302],
[10.963,0.216,0.191],
[21.466,0.375,0.320],
[48.898,1.545,1.231],
[0.183,0.097,0.084],
[0.890,0.110,0.094],
[0.443,0.097,0.086],
[9.775,0.274,0.228],
[8.982,1.978,1.881],
[0.157,0.092,0.078],
[2.203,0.203,0.179],
[5.725,0.218,0.214],
[4.437,0.695,0.686],
[9.814,0.960,0.713],
[9.826,0.845,0.827],
[0.313,0.170,0.167],
[0.230,0.126,0.123],
[0.192,0.107,0.101],
[0.168,0.078,0.077],
[0.349,0.214,0.213],
[0.139,0.065,0.054],
[0.130,0.078,0.091],
[0.126,0.069,0.064]
]
}
19 changes: 16 additions & 3 deletions duckdb-parquet/run.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
#!/bin/bash

TRIES=3

cat queries.sql | while read -r query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null

./query.py <<< "${query}"
done
echo "$query";
cli_params=()
cli_params+=("-c")
cli_params+=("SET parquet_metadata_cache=true")
cli_params+=("-c")
cli_params+=(".timer on")
for i in $(seq 1 $TRIES); do
cli_params+=("-c")
cli_params+=("${query}")
done;
echo "${cli_params[@]}"
duckdb hits.db "${cli_params[@]}"
done;
28 changes: 18 additions & 10 deletions duckdb/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
#!/bin/bash

# Install

sudo apt-get update
sudo apt-get install -y python3-pip
pip install --break-system-packages duckdb==1.1.3 psutil
sudo apt-get install ninja-build cmake build-essential make ccache pip clang -y

# Load the data
export CC=clang
export CXX=clang++
git clone https://github.com/duckdb/duckdb
cd duckdb
git checkout v1.2-histrionicus
GEN=ninja NATIVE_ARCH=1 LTO=thin make
export PATH="$PATH:`pwd`/build/release/"
cd ..

wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
gzip -d hits.csv.gz
# Load the data
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz

./load.py
time duckdb hits.db -f create.sql -c "COPY hits FROM 'hits.tsv' (QUOTE '')"

# Run the queries

./run.sh 2>&1 | tee log.txt

wc -c my-db.duckdb
wc -c hits.db

cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
cat log.txt |
grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' |
sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/; s/^Run Time \(s\): real\s*([0-9.]+).*$/\1/' |
awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
22 changes: 0 additions & 22 deletions duckdb/load.py

This file was deleted.

Loading

0 comments on commit 548ad46

Please sign in to comment.