Skip to content

Commit

Permalink
Merge pull request #303 from adam-szymanski/main
Browse files Browse the repository at this point in the history
  • Loading branch information
rschu1ze authored Feb 9, 2025
2 parents 7bf3933 + d1ffa19 commit b58abd1
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 65 deletions.
16 changes: 6 additions & 10 deletions oxla/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
#!/bin/bash -e

# docker
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
sudo apt install -y docker-ce
sudo apt install docker.io

# base
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
Expand All @@ -15,26 +12,25 @@ echo "Download dataset."
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
echo "Unpack dataset."
gzip -d hits.csv.gz
chmod 777 ~ hits.csv
mkdir data
mv hits.csv ~/data
mv hits.csv data

# get and configure Oxla image
echo "Install and run Oxla."

sudo docker run --rm -p 5432:5432 -v ~/data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.20.0-beta > /dev/null 2>&1 &
sudo docker run --rm -p 5432:5432 -v data:/data --name oxlacontainer public.ecr.aws/oxla/release:1.53.0-beta > /dev/null 2>&1 &
sleep 30 # waiting for container start and db initialisation (leader election, etc.)

# create table and ingest data
export PGCLIENTENCODING=UTF8

psql -h localhost -t < create.sql
PGPASSWORD=oxla psql -h localhost -U oxla -t < create.sql
echo "Insert data."
psql -h localhost -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"
PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "COPY hits FROM '/data/hits.csv';"

# get ingested data size
echo "data size after ingest:"
psql -h localhost -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"
PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "SELECT pg_total_relation_size('hits');"

# wait for merges to finish
sleep 60
Expand Down
20 changes: 10 additions & 10 deletions oxla/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@ SELECT COUNT(*) FROM hits;
SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0;
SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
SELECT AVG(UserID) FROM hits;
SELECT COUNT(*) FROM (SELECT UserId FROM hits GROUP BY UserId);
SELECT COUNT(*) FROM (SELECT SearchPhrase FROM hits GROUP BY SearchPhrase);
SELECT COUNT(DISTINCT UserID) FROM hits;
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
SELECT MIN(EventDate), MAX(EventDate) FROM hits;
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC;
SELECT RegionID, COUNT(*) AS u FROM (SELECT RegionID, UserID FROM hits GROUP BY RegionID, UserID) GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, SUM(AdvEngineIDSum), SUM(c) AS c, SUM(ResolutionWidthSum) / SUM(c), COUNT(*) FROM (SELECT SUM(AdvEngineID) AS AdvEngineIDSum, SUM(ResolutionWidth) AS ResolutionWidthSum, COUNT(*) AS c, RegionId, UserID FROM hits GROUP BY RegionID, UserID) GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(*) AS u FROM (SELECT MobilePhoneModel, UserID FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel, UserID) GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhoneModel, MobilePhone, COUNT(*) AS u FROM (SELECT MobilePhoneModel, MobilePhone, UserID FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel, MobilePhone, UserID) GROUP BY MobilePhoneModel, MobilePhone ORDER BY u DESC LIMIT 10;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(*) AS u FROM (SELECT SearchPhrase, UserID FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase, UserID) GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 435090932899640449;
SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%';
SELECT NULL;
SELECT NULL;
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT NULL;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Expand Down
88 changes: 44 additions & 44 deletions oxla/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"system": "Oxla",
"date": "2024-04-09",
"date": "2025-01-23",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "Ingests data only from non-compressed cvs.",
Expand All @@ -11,48 +11,48 @@
"data_size": 17394972923,

"result": [
[3.112892,0.068225,0.049215],
[1.476993,0.068502,0.01352],
[1.532504,0.015794,0.01899],
[1.541791,0.043208,0.090244],
[1.424205,1.114138,1.079011],
[1.546764,1.340306,1.339957],
[0.202873,0.009787,0.008214],
[1.017122,0.01247,0.010861],
[1.790766,1.816432,1.681662],
[2.06058,2.051205,2.05747],
[0.166164,0.149605,0.147586],
[0.33821,0.15334,0.15212],
[0.968408,0.975795,0.932127],
[1.641231,1.648973,1.691530],
[1.039926,1.021776,1.015062],
[1.059569,1.038191,1.016849],
[2.930077,2.780725,2.786122],
[2.7766,2.745188,2.827054],
[5.474963,5.455883,5.462812],
[0.069049,0.037876,0.030425],
[5.294758,2.818725,2.803313],
[null,null,null],
[null,null,null],
[21.034479,18.253271,6.146486],
[0.17394,0.151798,0.146398],
[0.180155,0.170271,0.177003],
[0.22494,0.216158,0.216051],
[0.978861,0.973059,0.964485],
[null,null,null],
[0.030928,0.02037,0.020366],
[0.408601,0.412485,0.408602],
[0.875709,0.743332,0.704842],
[7.962516,7.867736,7.594272],
[6.209667,5.892066,5.963681],
[5.931634,5.947336,6.005506],
[0.577314,0.583573,0.545736],
[0.126127,0.090768,0.094307],
[0.110712,0.04149,0.039939],
[0.060824,0.043637,0.030213],
[0.322545,0.204934,0.185178],
[0.121207,0.011082,0.011699],
[0.069138,0.012728,0.014108],
[0.030538,0.028048,0.030625]
[0.046851,0.02652,0.02648],
[0.129717,0.010132,0.010436],
[0.024405,0.017653,0.015578],
[1.038988,0.026278,0.025259],
[0.920077,0.867753,0.858069],
[1.193297,1.094777,1.088436],
[0.024024,0.015385,0.014366],
[0.021601,0.015406,0.013076],
[1.523167,1.323168,1.30051],
[1.906564,1.755886,1.844895],
[0.190242,0.1447,0.136949],
[0.221494,0.159692,0.160614],
[0.836624,0.8148,0.813433],
[1.664004,1.635831,1.621079],
[0.887504,0.885837,0.875387],
[1.070728,0.938593,1.021919],
[2.685652,2.644121,2.647854],
[2.667017,2.658392,2.628829],
[4.65067,4.626599,4.701596],
[0.105851,0.080334,0.05596],
[0.722496,0.705024,0.738724],
[0.765263,0.767057,0.739108],
[1.877141,1.390639,1.434695],
[16.211858,3.861157,3.897472],
[0.112136,0.099527,0.097523],
[0.121357,0.111725,0.112716],
[0.157924,0.150043,0.148599],
[0.698874,0.674169,0.685363],
[51.293703,51.584485,51.114784],
[0.114097,0.099482,0.102921],
[0.437985,0.415509,0.408477],
[0.720325,0.674251,0.681977],
[89.73327,81.154516,76.413726],
[7.202697,6.053296,6.002008],
[5.97555,5.824706,6.135259],
[0.68644,0.655987,0.6812],
[0.064488,0.06473,0.062154],
[0.023134,0.029644,0.023701],
[0.039028,0.05021,0.04079],
[0.152121,0.144167,0.132704],
[0.01652,0.012679,0.009873],
[0.018828,0.030098,0.021353],
[0.017781,0.017341,0.015782]
]
}
2 changes: 1 addition & 1 deletion oxla/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cat queries.sql | while read -r query; do
else
results+="["
for i in $(seq 1 $TRIES); do
time=$(psql -h localhost -t -c '\timing' -c "$query" | grep 'Time' | perl -nle 'm/Time: ([^ ]*) ms/; print $1 / 1000')
time=$(PGPASSWORD=oxla psql -h localhost -U oxla -t -c '\timing' -c "$query" | grep 'Time' | perl -nle 'm/Time: ([^ ]*) ms/; print $1 / 1000')
echo "$time s"
results+="$time,"
done
Expand Down

0 comments on commit b58abd1

Please sign in to comment.