Skip to content

Group By Benchmark

DB Q1 Q2 Q3 Q4 Q5 Q6 Q7
Rayforce 60 74 118 72 122 104 1394
ClickHouse 51 189 235 47 265 249 1732
? (4.0) 59 143 166 99 156 551 4497
ThePlatform 213 723 507 285 488 465 15712
DuckDB (multithread turned on) 63 153 360 23 322 330 878
DuckDB (multithread turned off) 347 690 601 108 440 528 3269

Dataset: G1_1e7_1e2_0_0.csv (10 million rows). Lower values (ms) are better.

RayforceDB demonstrates competitive performance across all 7 group by queries, consistently ranking among the top performers:

  • Q1-Q2: Rayforce (60-74ms) performs comparably to ClickHouse (51-189ms) and ? (4.0) (59-143ms)
  • Q3: At 118ms, Rayforce is 2.0x faster than DuckDB MT-off (601ms) and 2.0x faster than ? (4.0) (166ms)
  • Q4: Rayforce (72ms) is competitive, though ClickHouse leads at 47ms
  • Q5-Q6: Rayforce (122-104ms) shows strong performance, 2.1x faster than DuckDB MT-off on Q5 and 5.3x faster than ? (4.0) on Q6
  • Q7: Rayforce (1394ms) is 2.3x faster than ? (4.0) (4497ms) and 11.3x faster than ThePlatform (15712ms)

Rayforce Queries

(set t (csv [Symbol Symbol Symbol I64 I64 I64 I64 I64 F64] "./db-benchmark/G1_1e7_1e2_0_0.csv"))
(timeit (select {v1: (sum v1) from: t by: id1}))  ;; Q1
(timeit (select {v1: (sum v1) from: t by: {id1: id1 id2: id2}}))  ;; Q2
(timeit (select {v1: (sum v1) v3: (avg v3) from: t by: id3}))  ;; Q3
(timeit (select {v1: (avg v1) v2: (avg v2) v3: (avg v3) from: t by: id4}))  ;; Q4
(timeit (select {v1: (sum v1) v2: (sum v2) v3: (sum v3) from: t by: id6}))  ;; Q5
(timeit (select {range_v1_v2: (- (max v1) (min v2)) from: t by: id3}))  ;; Q6
(timeit (select {v3: (sum v3) count: (map count v3) from: t by: {id1: id1 id2: id2 id3: id3 id4: id4 id5: id5 id6: id6}}))  ;; Q7

DuckDB Queries

.timer "on"
create table t as SELECT * FROM read_csv('./db-benchmark/G1_1e7_1e2_0_0.csv');
select id1, sum(v1) AS v1 from t group by id1;  # Q1
select id1, id2, sum(v1) AS v1 from t group by id1, id2;  # Q2
select id3, sum(v1) AS v1, mean(v3) AS v3 from t group by id3;  # Q3
select id4, mean(v1) AS v1, mean(v2) AS v2, mean(v3) AS v3 from t group by id4;  # Q4
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6;  # Q5
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3;  # Q6
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6;  # Q7

ClickHouse Queries

CREATE TABLE t (id1 String, id2 String, id3 String, id4 Int64, id5 Int64, id6 Int64, v1 Int64, v2 Int64, v3 Float64) ENGINE = Memory;
clickhouse-client -q "INSERT INTO default.t FORMAT CSV" < ./db-benchmark/G1_1e7_1e2_0_0.csv
select id1, sum(v1) AS v1 from t group by id1;  # Q1
select id1, id2, sum(v1) AS v1 from t group by id1, id2;  # Q2
select id3, sum(v1) AS v1, avg(v3) AS v3 from t group by id3;  # Q3
select id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 from t group by id4;  # Q4
select id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 from t group by id6;  # Q5
select id3, max(v1)-min(v2) AS range_v1_v2 from t group by id3;  # Q6
select id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count from t group by id1, id2, id3, id4, id5, id6;  # Q7

? (4.0) Queries

t: ("SSSJJJJJF";enlist",") 0: hsym `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
select v1: sum v1 by id1 from t  // Q1
select v1: sum v1 by id1, id2 from t  // Q2
select v1: sum v1, v3: avg v3 by id3 from t  // Q3
select v1: avg v1, v2: avg v2, v3: avg v3 by id4 from t  // Q4
select v1: sum v1, v2: sum v2, v3: sum v3 by id6 from t  // Q5
select range_v1_v2: (max v1) - (min v2) by id3 from t  // Q6
select v3: sum v3, cnt: count i by id1, id2, id3, id4, id5, id6 from t  // Q7

ThePlatform Queries

t: ("SSSJJJJJF";enlist",") 0: `$":./db-benchmark/G1_1e7_1e2_0_0.csv"
0N#.?[t;();`id1!`id1;`v1!(sum;`v1)]
0N#.?[t;();`id1`id2!`id1`id2;`v1!(sum;`v1)]
0N#.?[t;();`id3!`id3;`v2`v3!((sum;`v2);(avg;`v3))]
0N#.?[t;();`id4!`id4;`v1`v2`v3!((avg;`v1);(avg;`v2);(avg;`v3))]
0N#.?[t;();`id6!`id6;`v1`v2`v3!((sum;`v1);(sum;`v2);(sum;`v3))]
0N#.?[?[t;();`id3!`id3;`v1`v2!((max;`v1);(min;`v2))];();0b;`id3`range_v1_v2!(`id3;(-;`v1;`v2))]
0N#.?[t;();`id1`id2`id3`id4`id5`id6!`id1`id2`id3`id4`id5`id6;`v3`count!((sum;`v3);(count;`v3))]