diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 6c35baceedb..78a307cc523 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1588,6 +1588,17 @@ aggregation. Pushing partial aggregation in this case may result in the rows being grouped differently than expected, or produce incorrect values from the aggregate functions. +Semi joins and anti joins impose a similar restriction. Such a join +does not preserve its inner rows in the join output, so a partial +aggregate computed on the inner side would not survive the join and +could not be combined by the final aggregation. We therefore do not +push partial aggregation down to the inner side of a semi/anti join. +(An anti join reduced from an outer join null-extends its inner side, +so that inner relation is already excluded by the outer-join condition +above; the case specifically addressed here is a semi/anti join that +does not null-extend its inner side, such as one formed from an +EXISTS, IN, NOT EXISTS, or NOT IN sublink.) + During the construction of the join tree, we evaluate each base or join relation to determine if eager aggregation can be applied. If feasible, we create a separate RelOptInfo called a "grouped relation" diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 3fc2c2f71d0..687e923c46c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -2845,6 +2845,32 @@ eager_aggregation_possible_for_relation(PlannerInfo *root, RelOptInfo *rel) return false; } + /* + * Similarly, we cannot push a partial aggregation down to a relation on + * the inner (RHS) side of a semi/anti join. A semi/anti join does not + * preserve its inner rows in the join output, so a partial aggregate + * computed on the inner side would not survive the join and could not be + * combined by the final aggregation. + * + * Note that an anti join reduced from an outer join null-extends its + * inner side, so that inner relation already carries nulling_relids and + * is handled by the outer-join check above. The case this check adds is + * a semi/anti join that does not null-extend its inner side, such as one + * formed from an EXISTS, IN, NOT EXISTS, or NOT IN sublink. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = lfirst_node(SpecialJoinInfo, lc); + + if (sjinfo->jointype != JOIN_SEMI && sjinfo->jointype != JOIN_ANTI) + continue; + + /* rel includes inner-side rels of this join but not its outer side */ + if (bms_overlap(rel->relids, sjinfo->min_righthand) && + !bms_is_subset(sjinfo->min_lefthand, rel->relids)) + return false; + } + /* * For now we don't try to support PlaceHolderVars. */ diff --git a/src/test/regress/expected/eager_aggregate.out b/src/test/regress/expected/eager_aggregate.out index 456d32eb13d..091ae48a92b 100644 --- a/src/test/regress/expected/eager_aggregate.out +++ b/src/test/regress/expected/eager_aggregate.out @@ -466,6 +466,96 @@ GROUP BY t1.a ORDER BY t1.a; -> Seq Scan on eager_agg_t1 t1 (9 rows) +-- Eager aggregation must not push a partial aggregate onto the inner side of a +-- SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + QUERY PLAN +------------------------------------------------------------ + Sort + Output: t2.b, (count(*)) + Sort Key: t2.b + -> HashAggregate + Output: t2.b, count(*) + Group Key: t2.b + -> Hash Anti Join + Output: t2.b + Hash Cond: (t2.a = t3.a) + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c + -> Hash + Output: t3.a + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a +(15 rows) + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + b | count +---+------- + 0 | 100 + 1 | 99 + 2 | 99 + 3 | 99 + 4 | 99 + 5 | 99 + 6 | 99 + 7 | 99 + 8 | 99 + 9 | 99 +(10 rows) + +-- Eager aggregation may still push a partial aggregate onto the outer side of +-- a SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Output: t2.b, count(*) + Group Key: t2.b + -> Sort + Output: t2.b, (PARTIAL count(*)) + Sort Key: t2.b + -> Hash Right Semi Join + Output: t2.b, (PARTIAL count(*)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.b, PARTIAL count(*) + Group Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(18 rows) + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + b | count +---+------- + 1 | 100 + 2 | 100 + 3 | 100 + 4 | 100 + 5 | 100 + 6 | 100 + 7 | 100 + 8 | 100 + 9 | 100 +(9 rows) + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3; diff --git a/src/test/regress/sql/eager_aggregate.sql b/src/test/regress/sql/eager_aggregate.sql index 53d9b377a64..7bca9c524da 100644 --- a/src/test/regress/sql/eager_aggregate.sql +++ b/src/test/regress/sql/eager_aggregate.sql @@ -177,6 +177,32 @@ SELECT t1.a, avg(t2.c) FILTER (WHERE random() > 0.5) JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +-- Eager aggregation must not push a partial aggregate onto the inner side of a +-- SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + +-- Eager aggregation may still push a partial aggregate onto the outer side of +-- a SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3;