DB(s) Benchmark¶
For benches we are using the following tool: H2OAI Bench
Prerequisites¶
Ubuntu:
Run R and then in a R console type:
Exit from R and type in a terminal:
Group By behchmark¶
Dataset: G1_1e7_1e2_0_0.csv (10 million rows)
DuckDB (multithread turned on)¶
.timer "on"
Load CSV: create table t as SELECT * FROM read_csv('./db-benchmark/G1_1e7_1e2_0_0.csv');
- Q1:
select id1, sum(v1) AS v1 from t group by id1; --> 63ms - Q2:
select id1, id2, sum(v1) AS v1 from t group by id1, id2; --> 153ms - Q3:
select id3, sum(v1) AS v1, mean(v3) AS v3 from t group by id3; --> 360ms - Q4:
select id4, mean(v1) AS v1, mean(v2) AS v2, mean(v3) AS v3 from t group by id4; --> 23ms - Q5:
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6; --> 322ms - Q6:
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3; --> 330ms - Q7:
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6; --> 878ms
DuckDB (multithread turned off)¶
.timer "on"
SET threads = 1;
Load CSV: create table t as SELECT * FROM read_csv('./db-benchmark/G1_1e7_1e2_0_0.csv');
- Q1:
select id1, sum(v1) AS v1 from t group by id1; --> 347ms - Q2:
select id1, id2, sum(v1) AS v1 from t group by id1, id2; --> 690ms - Q3:
select id3, sum(v1) AS v1, mean(v3) AS v3 from t group by id3; --> 601ms - Q4:
select id4, mean(v1) AS v1, mean(v2) AS v2, mean(v3) AS v3 from t group by id4; --> 108ms - Q5:
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6; --> 440ms - Q6:
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3; --> 528ms - Q7:
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6; --> 3269ms
ClickHouse¶
Load CSV: CREATE TABLE t (id1 String, id2 String, id3 String, id4 Int64, id5 Int64, id6 Int64, v1 Int64, v2 Int64, v3 Float64) ENGINE = Memory;
clickhouse-client -q "INSERT INTO default.t FORMAT CSV" < ./db-benchmark/G1_1e7_1e2_0_0.csv
- Q1:
select id1, sum(v1) AS v1 from t group by id1; --> 51ms - Q2:
select id1, id2, sum(v1) AS v1 from t group by id1, id2; --> 189ms - Q3:
select id3, sum(v1) AS v1, avg(v3) AS v3 from t group by id3; --> 235ms - Q4:
select id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 from t group by id4; --> 47ms - Q5:
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6; --> 265ms - Q6:
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3; --> 249ms - Q7:
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6; --> 1732ms
? (4.0)¶
Load CSV: t: ("SSSJJJJJF";enlist",") 0: hsym `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
- Q1:
\t select v1: sum v1 by id1 from t --> 59ms - Q2:
\t select v1: sum v1 by id1, id2 from t --> 143ms - Q3:
\t select v1: sum v1, v3: avg v3 by id3 from t --> 166ms - Q4:
\t select v1: avg v1, v2: avg v2, v3: avg v3 by id4 from t --> 99ms - Q5:
\t select v1: sum v1, v2: sum v2, v3: sum v3 by id6 from t --> 156ms - Q6:
\t select range_v1_v2: (max v1) - (min v2) by id3 from t --> 551ms - Q7:
\t select v3: sum v3, cnt: count i by id1, id2, id3, id4, id5, id6 from t --> 4497ms
Rayforce¶
Load CSV: (set t (csv [Symbol Symbol Symbol I64 I64 I64 I64 I64 F64] "./db-benchmark/G1_1e7_1e2_0_0.csv"))
- Q1:
(timeit (select {v1: (sum v1) from: t by: id1})) --> 60ms - Q2:
(timeit (select {v1: (sum v1) from: t by: {id1: id1 id2: id2}})) --> 74ms - Q3:
(timeit (select {v1: (sum v1) v3: (avg v3) from: t by: id3})) --> 118ms - Q4:
(timeit (select {v1: (avg v1) v2: (avg v2) v3: (avg v3) from: t by: id4})) --> 72ms - Q5:
(timeit (select {v1: (sum v1) v2: (sum v2) v3: (sum v3) from: t by: id6})) --> 122ms - Q6:
(timeit (select {range_v1_v2: (- (max v1) (min v2)) from: t by: id3})) --> 104ms - Q7:
(timeit (select {v3: (sum v3) count: (map count v3) from: t by: {id1: id1 id2: id2 id3: id3 id4: id4 id5: id5 id6: id6}})) --> 1394ms
ThePlatform¶
Load CSV: t: ("SSSJJJJJF";enlist",") 0: `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
- Q1:
\t 0N#.?[t;();`id1!`id1;`v1!(sum;`v1)] --> 213ms - Q2:
\t 0N#.?[t;();`id1`id2!`id1`id2;`v1!(sum;`v1)] --> 723ms - Q3:
\t 0N#.?[t;();`id3!`id3;`v2`v3!((sum;`v2);(avg;`v3))] --> 507ms - Q4:
\t 0N#.?[t;();`id4!`id4;`v1`v2`v3!((avg;`v1);(avg;`v2);(avg;`v3))] --> 285ms - Q5:
\t 0N#.?[t;();`id6!`id6;`v1`v2`v3!((sum;`v1);(sum;`v2);(sum;`v3))] --> 488ms - Q6:
\t 0N#.?[?[t;();`id3!`id3;`v1`v2!((max;`v1);(min;`v2))];();0b;`id3`range_v1_v2!(`id3;(-;`v1;`v2))] --> 465ms - Q7:
\t 0N#.?[t;();`id1`id2`id3`id4`id5`id6!`id1`id2`id3`id4`id5`id6;`v3`count!((sum;`v3);(count;`v3))] --> 15712ms
Group By Results¶
| DB | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 |
|---|---|---|---|---|---|---|---|
| DuckDB (multithread turned on) | 63 | 153 | 360 | 23 | 322 | 330 | 878 |
| DuckDB (multithread turned off) | 347 | 690 | 601 | 108 | 440 | 528 | 3269 |
| ClickHouse | 51 | 189 | 235 | 47 | 265 | 249 | 1732 |
| ? (4.0) | 59 | 143 | 166 | 99 | 156 | 551 | 4497 |
| Rayforce | 60 | 74 | 118 | 72 | 122 | 104 | 1394 |
| ThePlatform | 213 | 723 | 507 | 285 | 488 | 465 | 15712 |
Join Benchmark¶
DuckDB (multithread turned on)¶
.timer "on"
Load CSV:
- x:
create table x as SELECT * FROM read_csv('./db-benchmark/J1_1e7_NA_0_0.csv'); - y:
create table y as SELECT * FROM read_csv('./db-benchmark/J1_1e7_1e7_0_0.csv');
Queries:
- Q1:
select * from x left join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM - Q2:
select * from x inner join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM
DuckDB (multithread turned off)¶
.timer "on"
SET threads = 1;
Queries:
- Q1:
select * from x left join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM - Q2:
select * from x inner join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM
ClickHouse¶
Load CSV:
CREATE TABLE x (id1 Int64, id2 Int64, id3 Int64, id4 String, id5 String, id6 String, v1 Float64) ENGINE = Memory;CREATE TABLE y (id1 Int64, id2 Int64, id3 Int64, id4 String, id5 String, id6 String, v2 Float64) ENGINE = Memory;
clickhouse-client -q "INSERT INTO default.x FORMAT CSV" < ./db-benchmark/J1_1e7_NA_0_0.csv clickhouse-client -q "INSERT INTO default.y FORMAT CSV" < ./db-benchmark/J1_1e7_1e7_0_0.csv
Queries:
- Q1:
select * from x left join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM - Q2:
select * from x inner join y on x.id1 = y.id1 and x.id2 = y.id2; --> OOM
? (4.0)¶
Load CSV:
- x:
("JJJSSSF";enlist",") 0: `$":./db-benchmark/J1_1e7_NA_0_0.csv" - y:
("JJJSSSF";enlist",") 0: `$":./db-benchmark/J1_1e7_1e7_0_0.csv"
Queries:
- Q1:
\t x lj 2!y --> 3174ms - Q2:
\t x ij 2!y --> 3098ms
Rayforce¶
Load CSV:
(set x (csv [I64 I64 I64 Symbol Symbol Symbol F64] "./db-benchmark/J1_1e7_NA_0_0.csv"))(set y (csv [I64 I64 I64 Symbol Symbol Symbol F64] "./db-benchmark/J1_1e7_1e7_0_0.csv"))
Queries:
- Q1:
(timeit (lj [id1 id2] x y)) --> 3149ms - Q2:
(timeit (ij [id1 id2] x y)) --> 1610ms
ThePlatform¶
Load CSV:
- x:
("JJJSSSF";enlist",") 0: `$":./db-benchmark/J1_1e7_NA_0_0.csv" - y:
("JJJSSSF";enlist",") 0: `$":./db-benchmark/J1_1e7_1e7_0_0.csv"
Queries:
-
Q1:
\t 0N#.(?[lj[(`x;x);(`y;y);((~`x`id1;~`x`id2);(~`y`id1;~`y`id2))]; (); 0b; `a`b`c`d`e`f`g`h!(~`y`id1;~`y`id2;`y`id3;~`y`id4;~`y`id5;~`y`id6;~`x`v1;~`y`v2)]) --> 23987ms -
Q2:
\t 0N#.(?[ij[(`x;x);(`y;y);((~`x`id1;~`x`id2);(~`y`id1;~`y`id2))]; (); 0b; `a`b`c`d`e`f`g`h!(~`y`id1;~`y`id2;`y`id3;~`y`id4;~`y`id5;~`y`id6;~`x`v1;~`y`v2)]) --> 34104ms
Join Results¶
| DB | Q1 | Q2 |
|---|---|---|
| DuckDB (multithread turned on) | OOM | OOM |
| DuckDB (multithread turned off) | OOM | OOM |
| ClickHouse | OOM | OOM |
| ? (4.0) | 3174 | 3098 |
| Rayforce | 3149 | 1610 |
| ThePlatform | 23987 | 34104 |
Window Join Benchmark¶
? (4.0)¶

Rayforce¶

Window Join Results¶
| DB | Q1 |
|---|---|
| ? (4.0) | ~33 min |
| Rayforce | 59145.60 ms |