diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3f506ae3b06..ee116790412 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -19,6 +19,7 @@ #include "catalog/pg_operator.h" #include "executor/executor.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -1093,6 +1094,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) Assert(list_length(colnos) == list_length(opids)); + /* + * A set-returning function in the query's targetlist can result in + * returning duplicate rows, if the SRF is evaluated after the + * de-duplication step; so we play it safe and say "no" if there are any + * SRFs. (We could be certain that it's okay if SRFs appear only in the + * specified columns, since those must be evaluated before de-duplication; + * but it doesn't presently seem worth the complication to check that.) + */ + if (expression_returns_set((Node *) query->targetList)) + return false; + /* * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the * columns in the DISTINCT clause appear in colnos and operator semantics diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 6194d259a10..c9d80bc045a 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -650,3 +650,15 @@ select * from int4_tbl where 0 (1 row) +-- +-- Check for incorrect optimization when IN subquery contains a SRF +-- +set enable_hashjoin to 0; +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); + f1 +---- + 0 +(1 row) + +reset enable_hashjoin; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 33b894c2b5e..a748889b23d 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -396,3 +396,11 @@ where a.thousand = b.thousand select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in (select ten from tenk1 b); + +-- +-- Check for incorrect optimization when IN subquery contains a SRF +-- +set enable_hashjoin to 0; +select * from int4_tbl o where (f1, f1) in + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); +reset enable_hashjoin;