Allow defining extended statistics on expressions, not just just on
simple column references. With this commit, expressions are supported
by all existing extended statistics kinds, improving the same types of
estimates. A simple example may look like this:
CREATE TABLE t (a int);
CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t;
ANALYZE t;
The collected statistics are useful e.g. to estimate queries with those
expressions in WHERE or GROUP BY clauses:
SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0;
SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20);
This introduces new internal statistics kind 'e' (expressions) which is
built automatically when the statistics object definition includes any
expressions. This represents single-expression statistics, as if there
was an expression index (but without the index maintenance overhead).
The statistics is stored in pg_statistics_ext_data as an array of
composite types, which is possible thanks to 79f6a942bd.
CREATE STATISTICS allows building statistics on a single expression, in
which case in which case it's not possible to specify statistics kinds.
A new system view pg_stats_ext_exprs can be used to display expression
statistics, similarly to pg_stats and pg_stats_ext views.
ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it
treats indexes, i.e. it drops and recreates the statistics. This means
all statistics are reset, and we no longer try to preserve at least the
functional dependencies. This should not be a major issue in practice,
as the functional dependencies actually rely on per-column statistics,
which were always reset anyway.
Author: Tomas Vondra
Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu
Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
catalogs. This view allows access only to rows of
<link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link> and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
that correspond to tables the user has permission to read, and therefore
it is safe to allow public read access to this view.
</para>
<para>
<structname>pg_stats_ext_exprs</structname> is also designed to present
the information in a more readable format than the underlying catalogs
— at the cost that its schema must be extended whenever the structure
of statistics in <link linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link> changes.
@ -304,7 +304,9 @@ CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) PRIMARY KEY, b text);
CREATE INDEX ctlt1_b_key ON ctlt1 (b);
CREATE INDEX ctlt1_b_key ON ctlt1 (b);
CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;
CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;
CREATE STATISTICS ctlt1_expr_stat ON (a || b) FROM ctlt1;
COMMENT ON STATISTICS ctlt1_a_b_stat IS 'ab stats';
COMMENT ON STATISTICS ctlt1_a_b_stat IS 'ab stats';
COMMENT ON STATISTICS ctlt1_expr_stat IS 'ab expr stats';
COMMENT ON COLUMN ctlt1.a IS 'A';
COMMENT ON COLUMN ctlt1.a IS 'A';
COMMENT ON COLUMN ctlt1.b IS 'B';
COMMENT ON COLUMN ctlt1.b IS 'B';
COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check';
COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check';
@ -414,7 +416,8 @@ Indexes:
Check constraints:
Check constraints:
"ctlt1_a_check" CHECK (length(a) > 2)
"ctlt1_a_check" CHECK (length(a) > 2)
Statistics objects:
Statistics objects:
"public"."ctlt_all_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM ctlt_all
"public"."ctlt_all_a_b_stat" ON a, b FROM ctlt_all
"public"."ctlt_all_expr_stat" ON ((a || b)) FROM ctlt_all
SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_class c WHERE classoid = 'pg_class'::regclass AND objoid = i.indexrelid AND c.oid = i.indexrelid AND i.indrelid = 'ctlt_all'::regclass ORDER BY c.relname, objsubid;
SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_class c WHERE classoid = 'pg_class'::regclass AND objoid = i.indexrelid AND c.oid = i.indexrelid AND i.indrelid = 'ctlt_all'::regclass ORDER BY c.relname, objsubid;
relname | objsubid | description
relname | objsubid | description
@ -424,10 +427,11 @@ SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_clas
(2 rows)
(2 rows)
SELECT s.stxname, objsubid, description FROM pg_description, pg_statistic_ext s WHERE classoid = 'pg_statistic_ext'::regclass AND objoid = s.oid AND s.stxrelid = 'ctlt_all'::regclass ORDER BY s.stxname, objsubid;
SELECT s.stxname, objsubid, description FROM pg_description, pg_statistic_ext s WHERE classoid = 'pg_statistic_ext'::regclass AND objoid = s.oid AND s.stxrelid = 'ctlt_all'::regclass ORDER BY s.stxname, objsubid;
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
DROPSTATISTICSfunc_deps_stat;
-- now try functional dependencies with expressions
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
-- IN
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
-- OR clauses referencing the same attribute
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
-- OR clauses referencing different attributes
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
-- ANY
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
-- ANY with inequalities should not benefit from functional dependencies
-- the estimates however improve thanks to having expression statistics
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
-- ALL (should not benefit from functional dependencies)
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
-- IN
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
-- OR clauses referencing the same attribute
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
-- OR clauses referencing different attributes
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
-- ANY
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
-- ANY with inequalities should not benefit from functional dependencies
-- the estimates however improve thanks to having expression statistics
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
-- ALL (should not benefit from functional dependencies)
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
-- check the ability to use multiple functional dependencies
-- check the ability to use multiple functional dependencies
CREATETABLEfunctional_dependencies_multi(
CREATETABLEfunctional_dependencies_multi(
aINTEGER,
aINTEGER,
@ -479,6 +889,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
TRUNCATEmcv_lists;
DROPSTATISTICSmcv_lists_stats;
-- random data (no MCV list), but with expression
INSERTINTOmcv_lists(a,b,c,filler1)
SELECTi,i,i,iFROMgenerate_series(1,1000)s(i);
ANALYZEmcv_lists;
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
-- 100 distinct combinations, all in the MCV list
-- 100 distinct combinations, all in the MCV list
TRUNCATEmcv_lists;
TRUNCATEmcv_lists;
DROPSTATISTICSmcv_lists_stats;
DROPSTATISTICSmcv_lists_stats;
@ -565,6 +997,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@ -602,6 +1036,98 @@ ANALYZE mcv_lists;
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
-- 100 distinct combinations, all in the MCV list, but with expressions
TRUNCATEmcv_lists;
DROPSTATISTICSmcv_lists_stats;
INSERTINTOmcv_lists(a,b,c,filler1)
SELECTi,i,i,iFROMgenerate_series(1,1000)s(i);
ANALYZEmcv_lists;
-- without any stats on the expressions, we have to use default selectivities, which
-- is why the estimates here are different from the pre-computed case above
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats)
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
DROPSTATISTICSmcv_lists_stats_1;
DROPSTATISTICSmcv_lists_stats_2;
DROPSTATISTICSmcv_lists_stats_3;
-- create statistics with both MCV and expressions
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
SELECT*FROMcheck_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL');
-- 100 distinct combinations with NULL values, all in the MCV list
-- 100 distinct combinations with NULL values, all in the MCV list
TRUNCATEmcv_lists;
TRUNCATEmcv_lists;
DROPSTATISTICSmcv_lists_stats;
DROPSTATISTICSmcv_lists_stats;
@ -894,6 +1420,57 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR