Fix semijoin unique-ification for child relations

For a child relation, we should not assume that its parent's
unique-ified relation (or unique-ified path in v18) always exists.  In
cases where all RHS columns that need to be unique-ified are equated
to constants, the unique-ified relation/path for the parent table is
not built, as there are no columns left to unique-ify.  Failing to
account for this can result in a SIGSEGV crash during planning.

This patch checks whether the parent's unique-ified relation or path
exists and skips unique-ification of the child relation if it does
not.

Author: Richard Guo <guofenglinux@gmail.com>
Discussion: https://postgr.es/m/CAMbWs49MOdLW2c+qbLHHBt8VBu=4ONpM91D19=AWeW93eFUF6A@mail.gmail.com
Backpatch-through: 18
master
Richard Guo 2 weeks ago
parent fabd8b8e2a
commit 97b0f36bde
  1. 9
      src/backend/optimizer/plan/planner.c
  2. 60
      src/test/regress/expected/join.out
  3. 37
      src/test/regress/sql/join.sql

@ -8307,6 +8307,15 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo)
if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
return NULL;
/*
* Punt if this is a child relation and we failed to build a unique-ified
* relation for its parent. This can happen if all the RHS columns were
* found to be equated to constants when unique-ifying the parent table,
* leaving no columns to unique-ify.
*/
if (IS_OTHER_REL(rel) && rel->top_parent->unique_rel == NULL)
return NULL;
/*
* When called during GEQO join planning, we are in a short-lived memory
* context. We must make sure that the unique rel and any subsidiary data

@ -6580,6 +6580,66 @@ where exists (select 1 from t t4
1
(1 row)
rollback;
-- check handling of semijoins if all RHS columns are equated to constants: we
-- should suppress unique-ification in this case.
begin;
create temp table t (a int, b int);
insert into t values (1, 2);
explain (costs off)
select * from t t1, t t2 where exists
(select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2);
QUERY PLAN
---------------------------------------------
Nested Loop Semi Join
-> Nested Loop
-> Seq Scan on t t1
Filter: (a = 1)
-> Materialize
-> Seq Scan on t t2
Filter: (b = 2)
-> Materialize
-> Seq Scan on t t3
Filter: ((a = 1) AND (b = 2))
(10 rows)
select * from t t1, t t2 where exists
(select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2);
a | b | a | b
---+---+---+---
1 | 2 | 1 | 2
(1 row)
rollback;
-- check handling of semijoin unique-ification for child relations if all RHS
-- columns are equated to constants.
begin;
create temp table p (a int, b int) partition by range (a);
create temp table p1 partition of p for values from (0) to (10);
create temp table p2 partition of p for values from (10) to (20);
insert into p values (1, 2);
insert into p values (10, 20);
set enable_partitionwise_join to on;
explain (costs off)
select * from p t1 where exists
(select 1 from p t2 where t1.a = t2.a and t1.a = 1);
QUERY PLAN
-------------------------------
Nested Loop Semi Join
-> Seq Scan on p1 t1
Filter: (a = 1)
-> Materialize
-> Seq Scan on p1 t2
Filter: (a = 1)
(6 rows)
select * from p t1 where exists
(select 1 from p t2 where t1.a = t2.a and t1.a = 1);
a | b
---+---
1 | 2
(1 row)
rollback;
-- test cases where we can remove a join, but not a PHV computed at it
begin;

@ -2453,6 +2453,43 @@ where exists (select 1 from t t4
rollback;
-- check handling of semijoins if all RHS columns are equated to constants: we
-- should suppress unique-ification in this case.
begin;
create temp table t (a int, b int);
insert into t values (1, 2);
explain (costs off)
select * from t t1, t t2 where exists
(select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2);
select * from t t1, t t2 where exists
(select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2);
rollback;
-- check handling of semijoin unique-ification for child relations if all RHS
-- columns are equated to constants.
begin;
create temp table p (a int, b int) partition by range (a);
create temp table p1 partition of p for values from (0) to (10);
create temp table p2 partition of p for values from (10) to (20);
insert into p values (1, 2);
insert into p values (10, 20);
set enable_partitionwise_join to on;
explain (costs off)
select * from p t1 where exists
(select 1 from p t2 where t1.a = t2.a and t1.a = 1);
select * from p t1 where exists
(select 1 from p t2 where t1.a = t2.a and t1.a = 1);
rollback;
-- test cases where we can remove a join, but not a PHV computed at it
begin;

Loading…
Cancel
Save