This extends the Aggregate node with two new features: HashAggregate
can now run multiple hashtables concurrently, and a new strategy
MixedAggregate populates hashtables while doing sorted grouping.
The planner will now attempt to save as many sorts as possible when
planning grouping sets queries, while not exceeding work_mem for the
estimated combined sizes of all hashtables used. No SQL-level changes
are required. There should be no user-visible impact other than the
new EXPLAIN output and possible changes to result ordering when ORDER
BY was not used (which affected a few regression tests). The
enable_hashagg option is respected.
Author: Andrew Gierth
Reviewers: Mark Dilger, Andres Freund
Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1;
2500
(6 rows)
-- hashing support
set enable_hashagg = true;
-- failure cases
select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
ERROR: could not implement GROUP BY
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
ERROR: could not implement GROUP BY
DETAIL: Some of the datatypes only support hashing, while others only support sorting.
-- simple cases
select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
a | b | grouping | sum | count | max
---+---+----------+-----+-------+-----
1 | | 1 | 60 | 5 | 14
2 | | 1 | 15 | 1 | 15
3 | | 1 | 33 | 2 | 17
4 | | 1 | 37 | 2 | 19
| 1 | 2 | 58 | 4 | 19
| 2 | 2 | 25 | 2 | 13
| 3 | 2 | 45 | 3 | 16
| 4 | 2 | 17 | 1 | 17
(8 rows)
explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
a | b | sum | rsum
---+---+-----+------
1 | 1 | 8 | 8
1 | 2 | 2 | 10
1 | | 10 | 20
2 | 2 | 2 | 22
2 | | 2 | 24
| 1 | 8 | 32
| 2 | 4 | 36
| | 12 | 48
(8 rows)
explain (costs off)
select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
from gstest2 group by cube (a,b) order by rsum, a, b;
QUERY PLAN
---------------------------------------------
Sort
Sort Key: (sum((sum(c))) OVER (?)), a, b
-> WindowAgg
-> Sort
Sort Key: a, b
-> MixedAggregate
Hash Key: a, b
Hash Key: a
Hash Key: b
Group Key: ()
-> Seq Scan on gstest2
(11 rows)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
a | b | sum
---+---+-----
1 | 1 | 1
1 | 2 | 1
1 | 3 | 1
1 | | 3
2 | 1 | 2
2 | 2 | 2
2 | 3 | 2
2 | | 6
| 1 | 3
| 2 | 3
| 3 | 3
| | 9
(12 rows)
explain (costs off)
select a, b, sum(v.x)
from (values (1),(2)) v(x), gstest_data(v.x)
group by cube (a,b) order by a,b;
QUERY PLAN
------------------------------------------------
Sort
Sort Key: gstest_data.a, gstest_data.b
-> MixedAggregate
Hash Key: gstest_data.a, gstest_data.b
Hash Key: gstest_data.a
Hash Key: gstest_data.b
Group Key: ()
-> Nested Loop
-> Values Scan on "*VALUES*"
-> Function Scan on gstest_data
(10 rows)
-- More rescan tests
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
a | a | four | ten | count
---+---+------+-----+-------
1 | 1 | 0 | 0 | 50
1 | 1 | 0 | 2 | 50
1 | 1 | 0 | 4 | 50
1 | 1 | 0 | 6 | 50
1 | 1 | 0 | 8 | 50
1 | 1 | 0 | | 250
1 | 1 | 1 | 1 | 50
1 | 1 | 1 | 3 | 50
1 | 1 | 1 | 5 | 50
1 | 1 | 1 | 7 | 50
1 | 1 | 1 | 9 | 50
1 | 1 | 1 | | 250
1 | 1 | 2 | 0 | 50
1 | 1 | 2 | 2 | 50
1 | 1 | 2 | 4 | 50
1 | 1 | 2 | 6 | 50
1 | 1 | 2 | 8 | 50
1 | 1 | 2 | | 250
1 | 1 | 3 | 1 | 50
1 | 1 | 3 | 3 | 50
1 | 1 | 3 | 5 | 50
1 | 1 | 3 | 7 | 50
1 | 1 | 3 | 9 | 50
1 | 1 | 3 | | 250
1 | 1 | | 0 | 100
1 | 1 | | 1 | 100
1 | 1 | | 2 | 100
1 | 1 | | 3 | 100
1 | 1 | | 4 | 100
1 | 1 | | 5 | 100
1 | 1 | | 6 | 100
1 | 1 | | 7 | 100
1 | 1 | | 8 | 100
1 | 1 | | 9 | 100
1 | 1 | | | 1000
2 | 2 | 0 | 0 | 50
2 | 2 | 0 | 2 | 50
2 | 2 | 0 | 4 | 50
2 | 2 | 0 | 6 | 50
2 | 2 | 0 | 8 | 50
2 | 2 | 0 | | 250
2 | 2 | 1 | 1 | 50
2 | 2 | 1 | 3 | 50
2 | 2 | 1 | 5 | 50
2 | 2 | 1 | 7 | 50
2 | 2 | 1 | 9 | 50
2 | 2 | 1 | | 250
2 | 2 | 2 | 0 | 50
2 | 2 | 2 | 2 | 50
2 | 2 | 2 | 4 | 50
2 | 2 | 2 | 6 | 50
2 | 2 | 2 | 8 | 50
2 | 2 | 2 | | 250
2 | 2 | 3 | 1 | 50
2 | 2 | 3 | 3 | 50
2 | 2 | 3 | 5 | 50
2 | 2 | 3 | 7 | 50
2 | 2 | 3 | 9 | 50
2 | 2 | 3 | | 250
2 | 2 | | 0 | 100
2 | 2 | | 1 | 100
2 | 2 | | 2 | 100
2 | 2 | | 3 | 100
2 | 2 | | 4 | 100
2 | 2 | | 5 | 100
2 | 2 | | 6 | 100
2 | 2 | | 7 | 100
2 | 2 | | 8 | 100
2 | 2 | | 9 | 100
2 | 2 | | | 1000
(70 rows)
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
-- Rescan logic changes when there are no empty grouping sets, so test
-- that too:
select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
a | a | four | ten | count
---+---+------+-----+-------
1 | 1 | 0 | | 250
1 | 1 | 1 | | 250
1 | 1 | 2 | | 250
1 | 1 | 3 | | 250
1 | 1 | | 0 | 100
1 | 1 | | 1 | 100
1 | 1 | | 2 | 100
1 | 1 | | 3 | 100
1 | 1 | | 4 | 100
1 | 1 | | 5 | 100
1 | 1 | | 6 | 100
1 | 1 | | 7 | 100
1 | 1 | | 8 | 100
1 | 1 | | 9 | 100
2 | 2 | 0 | | 250
2 | 2 | 1 | | 250
2 | 2 | 2 | | 250
2 | 2 | 3 | | 250
2 | 2 | | 0 | 100
2 | 2 | | 1 | 100
2 | 2 | | 2 | 100
2 | 2 | | 3 | 100
2 | 2 | | 4 | 100
2 | 2 | | 5 | 100
2 | 2 | | 6 | 100
2 | 2 | | 7 | 100
2 | 2 | | 8 | 100
2 | 2 | | 9 | 100
(28 rows)
select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);