Convert 'x IN (VALUES ...)' to 'x = ANY ...' then appropriate

This commit implements the automatic conversion of 'x IN (VALUES ...)' into
ScalarArrayOpExpr.  That simplifies the query tree, eliminating the appearance
of an unnecessary join.

Since VALUES describes a relational table, and the value of such a list is
a table row, the optimizer will likely face an underestimation problem due to
the inability to estimate cardinality through MCV statistics.  The cardinality
evaluation mechanism can work with the array inclusion check operation.
If the array is small enough (< 100 elements), it will perform a statistical
evaluation element by element.

We perform the transformation in the convert_ANY_sublink_to_join() if VALUES
RTE is proper and the transformation is convertible.  The conversion is only
possible for operations on scalar values, not rows.  Also, we currently
support the transformation only when it ends up with a constant array.
Otherwise, the evaluation of non-hashed SAOP might be slower than the
corresponding Hash Join with VALUES.

Discussion: https://postgr.es/m/0184212d-1248-4f1f-a42d-f5cb1c1976d2%40tantorlabs.com
Author: Alena Rybakina <a.rybakina@postgrespro.ru>
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Ivan Kush <ivan.kush@tantorlabs.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
pull/210/head
Alexander Korotkov 3 months ago
parent d48d2e2dc8
commit c0962a113d
  1. 80
      src/backend/optimizer/plan/subselect.c
  2. 12
      src/backend/optimizer/prep/prepjointree.c
  3. 14
      src/backend/optimizer/util/clauses.c
  4. 3
      src/include/optimizer/subselect.h
  5. 308
      src/test/regress/expected/subselect.out
  6. 100
      src/test/regress/sql/subselect.sql

@ -1214,6 +1214,86 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
return expression_tree_walker(node, inline_cte_walker, context);
}
/*
* Attempt to transform 'testexpr' over the VALUES subquery into
* a ScalarArrayOpExpr. We currently support the transformation only when
* it ends up with a constant array. Otherwise, the evaluation of non-hashed
* SAOP might be slower than the corresponding Hash Join with VALUES.
*
* Return transformed ScalarArrayOpExpr or NULL if transformation isn't
* allowed.
*/
ScalarArrayOpExpr *
convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values)
{
RangeTblEntry *rte;
Node *leftop;
Node *rightop;
Oid opno;
ListCell *lc;
Oid inputcollid;
List *exprs = NIL;
/*
* Check we have a binary operator over a single-column subquery with no
* joins and no LIMIT/OFFSET/ORDER BY clauses.
*/
if (!IsA(testexpr, OpExpr) ||
list_length(((OpExpr *) testexpr)->args) != 2 ||
list_length(values->targetList) > 1 ||
values->limitCount != NULL ||
values->limitOffset != NULL ||
values->sortClause != NIL ||
list_length(values->rtable) != 1)
return NULL;
rte = linitial_node(RangeTblEntry, values->rtable);
leftop = linitial(((OpExpr *) testexpr)->args);
rightop = lsecond(((OpExpr *) testexpr)->args);
opno = ((OpExpr *) testexpr)->opno;
inputcollid = ((OpExpr *) testexpr)->inputcollid;
/*
* Also, check that only RTE corresponds to VALUES; the list of values has
* at least two items and no volatile functions.
*/
if (rte->rtekind != RTE_VALUES ||
list_length(rte->values_lists) < 2 ||
contain_volatile_functions((Node *) rte->values_lists))
return NULL;
foreach(lc, rte->values_lists)
{
List *elem = lfirst(lc);
Node *value = linitial(elem);
/*
* Prepare an evaluation of the right side of the operator with
* substitution of the given value.
*/
value = convert_testexpr(root, rightop, list_make1(value));
/*
* Try to evaluate constant expressions. We could get Const as a
* result.
*/
value = eval_const_expressions(root, value);
/*
* As we only support constant output arrays, all the items must also
* be constant.
*/
if (!IsA(value, Const))
return NULL;
exprs = lappend(exprs, value);
}
/* Finally, build ScalarArrayOpExpr at the top of the 'exprs' list. */
return make_SAOP_expr(opno, leftop, exprType(rightop),
linitial_oid(rte->colcollations), inputcollid,
exprs, false);
}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join

@ -664,6 +664,18 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Is it a convertible ANY or EXISTS clause? */
if (sublink->subLinkType == ANY_SUBLINK)
{
ScalarArrayOpExpr *saop;
if ((saop = convert_VALUES_to_ANY(root,
sublink->testexpr,
(Query *) sublink->subselect)) != NULL)
/*
* The VALUES sequence was simplified. Nothing more to do
* here.
*/
return (Node *) saop;
if ((j = convert_ANY_sublink_to_join(root, sublink,
available_rels1)) != NULL)
{

@ -5484,26 +5484,30 @@ make_SAOP_expr(Oid oper, Node *leftexpr, Oid coltype, Oid arraycollid,
bool typbyval;
char typalign;
Datum *elems;
bool *nulls;
int i = 0;
ArrayType *arrayConst;
int dims[1] = {list_length(exprs)};
int lbs[1] = {1};
get_typlenbyvalalign(coltype, &typlen, &typbyval, &typalign);
elems = (Datum *) palloc(sizeof(Datum) * list_length(exprs));
nulls = (bool *) palloc(sizeof(bool) * list_length(exprs));
foreach_node(Const, value, exprs)
{
Assert(!value->constisnull);
elems[i++] = value->constvalue;
elems[i] = value->constvalue;
nulls[i++] = value->constisnull;
}
arrayConst = construct_array(elems, i, coltype,
typlen, typbyval, typalign);
arrayConst = construct_md_array(elems, nulls, 1, dims, lbs,
coltype, typlen, typbyval, typalign);
arrayNode = (Node *) makeConst(arraytype, -1, arraycollid,
-1, PointerGetDatum(arrayConst),
false, false);
pfree(elems);
pfree(nulls);
list_free(exprs);
}

@ -17,6 +17,9 @@
#include "nodes/plannodes.h"
extern void SS_process_ctes(PlannerInfo *root);
extern ScalarArrayOpExpr *convert_VALUES_to_ANY(PlannerInfo *root,
Node *testexpr,
Query *values);
extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
SubLink *sublink,
Relids available_rels);

@ -2652,3 +2652,311 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
Filter: (odd = b.odd)
(16 rows)
--
-- Test VALUES to ARRAY (VtA) transformation
--
-- VtA transformation for joined VALUES is not supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek, (VALUES('RFAAAA'), ('VJAAAA')) AS v (i)
WHERE onek.stringu1 = v.i;
QUERY PLAN
-------------------------------------------------------------
Nested Loop
-> Values Scan on "*VALUES*"
-> Index Scan using onek_stringu1 on onek
Index Cond: (stringu1 = ("*VALUES*".column1)::text)
(4 rows)
-- VtA transformation for a composite argument is not supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
ORDER BY unique1;
QUERY PLAN
-----------------------------------------------------------------
Sort
Sort Key: onek.unique1
-> Nested Loop
-> HashAggregate
Group Key: "*VALUES*".column1, "*VALUES*".column2
-> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = "*VALUES*".column1)
Filter: ("*VALUES*".column2 = ten)
(9 rows)
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
ORDER BY unique1;
QUERY PLAN
----------------------------------------------------------------------------------------
Sort
Sort Key: unique1
-> Bitmap Heap Scan on onek
Recheck Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
-> Bitmap Index Scan on onek_unique1
Index Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
(6 rows)
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (VALUES(1200), (1));
QUERY PLAN
-------------------------------------------------------------
Bitmap Heap Scan on onek
Recheck Cond: (unique1 = ANY ('{1200,1}'::integer[]))
-> Bitmap Index Scan on onek_unique1
Index Cond: (unique1 = ANY ('{1200,1}'::integer[]))
(4 rows)
-- Recursive evaluation of constant queries is not yet supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (SELECT x * x FROM (VALUES(1200), (1)) AS x(x));
QUERY PLAN
---------------------------------------------------------------------------
Nested Loop
-> Unique
-> Sort
Sort Key: (("*VALUES*".column1 * "*VALUES*".column1))
-> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = ("*VALUES*".column1 * "*VALUES*".column1))
(7 rows)
EXPLAIN (COSTS OFF)
SELECT unique1, stringu1 FROM onek WHERE stringu1::name IN (VALUES('RFAAAA'), ('VJAAAA'));
QUERY PLAN
------------------------------------------------------------------
Bitmap Heap Scan on onek
Recheck Cond: (stringu1 = ANY ('{RFAAAA,VJAAAA}'::text[]))
-> Bitmap Index Scan on onek_stringu1
Index Cond: (stringu1 = ANY ('{RFAAAA,VJAAAA}'::text[]))
(4 rows)
EXPLAIN (COSTS OFF)
SELECT unique1, stringu1 FROM onek WHERE stringu1::text IN (VALUES('RFAAAA'), ('VJAAAA'));
QUERY PLAN
----------------------------------------------------------------
Seq Scan on onek
Filter: ((stringu1)::text = ANY ('{RFAAAA,VJAAAA}'::text[]))
(2 rows)
EXPLAIN (COSTS OFF)
SELECT * from onek WHERE unique1 in (VALUES(1200::bigint), (1));
QUERY PLAN
------------------------------------------------------------
Bitmap Heap Scan on onek
Recheck Cond: (unique1 = ANY ('{1200,1}'::bigint[]))
-> Bitmap Index Scan on onek_unique1
Index Cond: (unique1 = ANY ('{1200,1}'::bigint[]))
(4 rows)
-- VtA shouldn't depend on the side of the join probing with the VALUES expression.
EXPLAIN (COSTS OFF)
SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
WHERE a.oid IN (VALUES (1), (2));
QUERY PLAN
---------------------------------------------------------
Nested Loop
-> Seq Scan on pg_am a
Filter: (oid = ANY ('{1,2}'::oid[]))
-> Index Scan using pg_class_oid_index on pg_class c
Index Cond: (oid = a.oid)
(5 rows)
EXPLAIN (COSTS OFF)
SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
WHERE c.oid IN (VALUES (1), (2));
QUERY PLAN
---------------------------------------------------------------
Hash Join
Hash Cond: (a.oid = c.oid)
-> Seq Scan on pg_am a
-> Hash
-> Index Scan using pg_class_oid_index on pg_class c
Index Cond: (oid = ANY ('{1,2}'::oid[]))
(6 rows)
-- Constant expressions are simplified
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE sin(two ) +four IN (VALUES (sin(0.5)), (2));
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Seq Scan on onek
Filter: ((sin((two)::double precision) + (four)::double precision) = ANY ('{0.479425538604203,2}'::double precision[]))
(2 rows)
EXPLAIN (COSTS OFF)
-- VtA allows NULLs in the list
SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (NULL), (2));
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
Seq Scan on onek
Filter: ((sin((two)::double precision) + (four)::double precision) = ANY ('{0.479425538604203,NULL,2}'::double precision[]))
(2 rows)
-- VtA is supported for custom plans where params are substituted with
-- constants. VtA is not supported with generic plans where params prevent
-- us from building a constant array.
PREPARE test (int, numeric, text) AS
SELECT ten FROM onek WHERE sin(two) * four / ($3::real) IN (VALUES (sin($2)), (2), ($1));
EXPLAIN (COSTS OFF) EXECUTE test(42, 3.14, '-1.5');
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on onek
Filter: (((sin((two)::double precision) * (four)::double precision) / '-1.5'::real) = ANY ('{0.0015926529164868282,2,42}'::double precision[]))
(2 rows)
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, NULL);
QUERY PLAN
--------------------------
Result
One-Time Filter: false
(2 rows)
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on onek
Filter: (((sin((two)::double precision) * (four)::double precision) / '-1.5'::real) = ANY ('{0.0015926529164868282,2,NULL}'::double precision[]))
(2 rows)
SET plan_cache_mode = 'force_generic_plan';
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Hash Semi Join
Hash Cond: (((sin((onek.two)::double precision) * (onek.four)::double precision) / ($3)::real) = "*VALUES*".column1)
-> Seq Scan on onek
-> Hash
-> Values Scan on "*VALUES*"
(5 rows)
RESET plan_cache_mode;
-- VtA doesn't support LIMIT, OFFSET, and ORDER BY clauses
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) OFFSET 1);
QUERY PLAN
----------------------------------------------------
Nested Loop
-> HashAggregate
Group Key: "*VALUES*".column1
-> Limit
-> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = "*VALUES*".column1)
(7 rows)
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
QUERY PLAN
----------------------------------------------------
Nested Loop
-> Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Sort
Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = "*VALUES*".column1)
(9 rows)
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
QUERY PLAN
----------------------------------------------------
Nested Loop
-> HashAggregate
Group Key: "*VALUES*".column1
-> Limit
-> Values Scan on "*VALUES*"
-> Index Scan using onek_unique1 on onek
Index Cond: (unique1 = "*VALUES*".column1)
(7 rows)
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t
WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
WHERE c.unique2 = t.unique1))::integer));
QUERY PLAN
------------------------------------------------------------
Nested Loop Semi Join
-> Seq Scan on onek t
-> Values Scan on "*VALUES*"
Filter: (t.unique1 = column1)
SubPlan 1
-> Index Only Scan using onek_unique2 on onek c
Index Cond: (unique2 = t.unique1)
(7 rows)
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t
WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
WHERE c.unique2 IN (VALUES (sin(0.5)), (2))))::integer));
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Nested Loop
-> Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*"
SubPlan 1
-> Index Only Scan using onek_unique2 on onek c
Filter: ((unique2)::double precision = ANY ('{0.479425538604203,2}'::double precision[]))
-> Index Scan using onek_unique1 on onek t
Index Cond: (unique1 = "*VALUES*".column1)
(10 rows)
-- VtA is not allowed with subqueries
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN
(SELECT (3)))::integer)
);
QUERY PLAN
----------------------------------------------------
Nested Loop
-> Unique
-> Sort
Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*"
SubPlan 1
-> Result
-> Index Scan using onek_unique1 on onek t
Index Cond: (unique1 = "*VALUES*".column1)
(9 rows)
-- VtA is not allowed with non-constant expressions
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), (unique2));
QUERY PLAN
---------------------------------------
Nested Loop Semi Join
-> Seq Scan on onek t
-> Values Scan on "*VALUES*"
Filter: (t.unique1 = column1)
(4 rows)
EXPLAIN (COSTS OFF)
SELECT * FROM onek t1, lateral (SELECT * FROM onek t2 WHERE t2.ten IN (values (t1.ten), (1)));
QUERY PLAN
--------------------------------------------------
Nested Loop
-> Seq Scan on onek t1
-> Hash Semi Join
Hash Cond: (t2.ten = "*VALUES*".column1)
-> Seq Scan on onek t2
-> Hash
-> Values Scan on "*VALUES*"
(7 rows)
-- VtA causes the whole expression to be evaluated as a constant
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
QUERY PLAN
--------------------
Seq Scan on onek t
(1 row)

@ -1202,3 +1202,103 @@ WHERE a.thousand < 750;
explain (costs off)
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
--
-- Test VALUES to ARRAY (VtA) transformation
--
-- VtA transformation for joined VALUES is not supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek, (VALUES('RFAAAA'), ('VJAAAA')) AS v (i)
WHERE onek.stringu1 = v.i;
-- VtA transformation for a composite argument is not supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
ORDER BY unique1;
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
ORDER BY unique1;
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (VALUES(1200), (1));
-- Recursive evaluation of constant queries is not yet supported
EXPLAIN (COSTS OFF)
SELECT * FROM onek
WHERE unique1 IN (SELECT x * x FROM (VALUES(1200), (1)) AS x(x));
EXPLAIN (COSTS OFF)
SELECT unique1, stringu1 FROM onek WHERE stringu1::name IN (VALUES('RFAAAA'), ('VJAAAA'));
EXPLAIN (COSTS OFF)
SELECT unique1, stringu1 FROM onek WHERE stringu1::text IN (VALUES('RFAAAA'), ('VJAAAA'));
EXPLAIN (COSTS OFF)
SELECT * from onek WHERE unique1 in (VALUES(1200::bigint), (1));
-- VtA shouldn't depend on the side of the join probing with the VALUES expression.
EXPLAIN (COSTS OFF)
SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
WHERE a.oid IN (VALUES (1), (2));
EXPLAIN (COSTS OFF)
SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
WHERE c.oid IN (VALUES (1), (2));
-- Constant expressions are simplified
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE sin(two ) +four IN (VALUES (sin(0.5)), (2));
EXPLAIN (COSTS OFF)
-- VtA allows NULLs in the list
SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (NULL), (2));
-- VtA is supported for custom plans where params are substituted with
-- constants. VtA is not supported with generic plans where params prevent
-- us from building a constant array.
PREPARE test (int, numeric, text) AS
SELECT ten FROM onek WHERE sin(two) * four / ($3::real) IN (VALUES (sin($2)), (2), ($1));
EXPLAIN (COSTS OFF) EXECUTE test(42, 3.14, '-1.5');
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, NULL);
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
SET plan_cache_mode = 'force_generic_plan';
EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
RESET plan_cache_mode;
-- VtA doesn't support LIMIT, OFFSET, and ORDER BY clauses
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) OFFSET 1);
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
EXPLAIN (COSTS OFF)
SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t
WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
WHERE c.unique2 = t.unique1))::integer));
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t
WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
WHERE c.unique2 IN (VALUES (sin(0.5)), (2))))::integer));
-- VtA is not allowed with subqueries
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN
(SELECT (3)))::integer)
);
-- VtA is not allowed with non-constant expressions
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), (unique2));
EXPLAIN (COSTS OFF)
SELECT * FROM onek t1, lateral (SELECT * FROM onek t2 WHERE t2.ten IN (values (t1.ten), (1)));
-- VtA causes the whole expression to be evaluated as a constant
EXPLAIN (COSTS OFF)
SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));

Loading…
Cancel
Save