Group By Benchmark¶
| DB | Q1 | Q2 | Q3 | Q4 | Q5 | Q6 | Q7 |
|---|---|---|---|---|---|---|---|
| Rayforce | 60 | 74 | 118 | 72 | 122 | 104 | 1394 |
| ClickHouse | 51 | 189 | 235 | 47 | 265 | 249 | 1732 |
| ? (4.0) | 59 | 143 | 166 | 99 | 156 | 551 | 4497 |
| ThePlatform | 213 | 723 | 507 | 285 | 488 | 465 | 15712 |
| DuckDB (multithread turned on) | 63 | 153 | 360 | 23 | 322 | 330 | 878 |
| DuckDB (multithread turned off) | 347 | 690 | 601 | 108 | 440 | 528 | 3269 |
Dataset: G1_1e7_1e2_0_0.csv (10 million rows). Lower values (ms) are better.
RayforceDB demonstrates competitive performance across all 7 group by queries, consistently ranking among the top performers:
- Q1-Q2: Rayforce (60-74ms) performs comparably to ClickHouse (51-189ms) and ? (4.0) (59-143ms)
- Q3: At 118ms, Rayforce is 2.0x faster than DuckDB MT-off (601ms) and 2.0x faster than ? (4.0) (166ms)
- Q4: Rayforce (72ms) is competitive, though ClickHouse leads at 47ms
- Q5-Q6: Rayforce (122-104ms) shows strong performance, 2.1x faster than DuckDB MT-off on Q5 and 5.3x faster than ? (4.0) on Q6
- Q7: Rayforce (1394ms) is 2.3x faster than ? (4.0) (4497ms) and 11.3x faster than ThePlatform (15712ms)
Rayforce Queries¶
(set t (csv [Symbol Symbol Symbol I64 I64 I64 I64 I64 F64] "./db-benchmark/G1_1e7_1e2_0_0.csv"))
(timeit (select {v1: (sum v1) from: t by: id1})) ;; Q1
(timeit (select {v1: (sum v1) from: t by: {id1: id1 id2: id2}})) ;; Q2
(timeit (select {v1: (sum v1) v3: (avg v3) from: t by: id3})) ;; Q3
(timeit (select {v1: (avg v1) v2: (avg v2) v3: (avg v3) from: t by: id4})) ;; Q4
(timeit (select {v1: (sum v1) v2: (sum v2) v3: (sum v3) from: t by: id6})) ;; Q5
(timeit (select {range_v1_v2: (- (max v1) (min v2)) from: t by: id3})) ;; Q6
(timeit (select {v3: (sum v3) count: (map count v3) from: t by: {id1: id1 id2: id2 id3: id3 id4: id4 id5: id5 id6: id6}})) ;; Q7
DuckDB Queries¶
.timer "on"
create table t as SELECT * FROM read_csv('./db-benchmark/G1_1e7_1e2_0_0.csv');
select id1, sum(v1) AS v1 from t group by id1; # Q1
select id1, id2, sum(v1) AS v1 from t group by id1, id2; # Q2
select id3, sum(v1) AS v1, mean(v3) AS v3 from t group by id3; # Q3
select id4, mean(v1) AS v1, mean(v2) AS v2, mean(v3) AS v3 from t group by id4; # Q4
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6; # Q5
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3; # Q6
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6; # Q7
ClickHouse Queries¶
CREATE TABLE t (id1 String, id2 String, id3 String, id4 Int64, id5 Int64, id6 Int64, v1 Int64, v2 Int64, v3 Float64) ENGINE = Memory;
select id1, sum(v1) AS v1 from t group by id1; # Q1
select id1, id2, sum(v1) AS v1 from t group by id1, id2; # Q2
select id3, sum(v1) AS v1, avg(v3) AS v3 from t group by id3; # Q3
select id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 from t group by id4; # Q4
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6; # Q5
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3; # Q6
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6; # Q7
? (4.0) Queries¶
t: ("SSSJJJJJF";enlist",") 0: hsym `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
select v1: sum v1 by id1 from t // Q1
select v1: sum v1 by id1, id2 from t // Q2
select v1: sum v1, v3: avg v3 by id3 from t // Q3
select v1: avg v1, v2: avg v2, v3: avg v3 by id4 from t // Q4
select v1: sum v1, v2: sum v2, v3: sum v3 by id6 from t // Q5
select range_v1_v2: (max v1) - (min v2) by id3 from t // Q6
select v3: sum v3, cnt: count i by id1, id2, id3, id4, id5, id6 from t // Q7
ThePlatform Queries¶
t: ("SSSJJJJJF";enlist",") 0: `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
0N#.?[t;();`id1!`id1;`v1!(sum;`v1)]
0N#.?[t;();`id1`id2!`id1`id2;`v1!(sum;`v1)]
0N#.?[t;();`id3!`id3;`v2`v3!((sum;`v2);(avg;`v3))]
0N#.?[t;();`id4!`id4;`v1`v2`v3!((avg;`v1);(avg;`v2);(avg;`v3))]
0N#.?[t;();`id6!`id6;`v1`v2`v3!((sum;`v1);(sum;`v2);(sum;`v3))]
0N#.?[?[t;();`id3!`id3;`v1`v2!((max;`v1);(min;`v2))];();0b;`id3`range_v1_v2!(`id3;(-;`v1;`v2))]
0N#.?[t;();`id1`id2`id3`id4`id5`id6!`id1`id2`id3`id4`id5`id6;`v3`count!((sum;`v3);(count;`v3))]