You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/src/test/regress/sql/partition_prune.sql

1190 lines
51 KiB

--
-- Test partitioning planner code
--
-- Force generic plans to be used for all prepared statements in this file.
set plan_cache_mode = force_generic_plan;
create table lp (a char) partition by list (a);
create table lp_default partition of lp default;
create table lp_ef partition of lp for values in ('e', 'f');
create table lp_ad partition of lp for values in ('a', 'd');
create table lp_bc partition of lp for values in ('b', 'c');
create table lp_g partition of lp for values in ('g');
create table lp_null partition of lp for values in (null);
explain (costs off) select * from lp;
explain (costs off) select * from lp where a > 'a' and a < 'd';
explain (costs off) select * from lp where a > 'a' and a <= 'd';
explain (costs off) select * from lp where a = 'a';
explain (costs off) select * from lp where 'a' = a; /* commuted */
explain (costs off) select * from lp where a is not null;
explain (costs off) select * from lp where a is null;
explain (costs off) select * from lp where a = 'a' or a = 'c';
explain (costs off) select * from lp where a is not null and (a = 'a' or a = 'c');
explain (costs off) select * from lp where a <> 'g';
explain (costs off) select * from lp where a <> 'a' and a <> 'd';
explain (costs off) select * from lp where a not in ('a', 'd');
-- collation matches the partitioning collation, pruning works
create table coll_pruning (a text collate "C") partition by list (a);
create table coll_pruning_a partition of coll_pruning for values in ('a');
create table coll_pruning_b partition of coll_pruning for values in ('b');
create table coll_pruning_def partition of coll_pruning default;
explain (costs off) select * from coll_pruning where a collate "C" = 'a' collate "C";
-- collation doesn't match the partitioning collation, no pruning occurs
explain (costs off) select * from coll_pruning where a collate "POSIX" = 'a' collate "POSIX";
create table rlp (a int, b varchar) partition by range (a);
create table rlp_default partition of rlp default partition by list (a);
create table rlp_default_default partition of rlp_default default;
create table rlp_default_10 partition of rlp_default for values in (10);
create table rlp_default_30 partition of rlp_default for values in (30);
create table rlp_default_null partition of rlp_default for values in (null);
create table rlp1 partition of rlp for values from (minvalue) to (1);
create table rlp2 partition of rlp for values from (1) to (10);
create table rlp3 (b varchar, a int) partition by list (b varchar_ops);
create table rlp3_default partition of rlp3 default;
create table rlp3abcd partition of rlp3 for values in ('ab', 'cd');
create table rlp3efgh partition of rlp3 for values in ('ef', 'gh');
create table rlp3nullxy partition of rlp3 for values in (null, 'xy');
alter table rlp attach partition rlp3 for values from (15) to (20);
create table rlp4 partition of rlp for values from (20) to (30) partition by range (a);
create table rlp4_default partition of rlp4 default;
create table rlp4_1 partition of rlp4 for values from (20) to (25);
create table rlp4_2 partition of rlp4 for values from (25) to (29);
create table rlp5 partition of rlp for values from (31) to (maxvalue) partition by range (a);
create table rlp5_default partition of rlp5 default;
create table rlp5_1 partition of rlp5 for values from (31) to (40);
explain (costs off) select * from rlp where a < 1;
explain (costs off) select * from rlp where 1 > a; /* commuted */
explain (costs off) select * from rlp where a <= 1;
explain (costs off) select * from rlp where a = 1;
explain (costs off) select * from rlp where a = 1::bigint; /* same as above */
explain (costs off) select * from rlp where a = 1::numeric; /* no pruning */
explain (costs off) select * from rlp where a <= 10;
explain (costs off) select * from rlp where a > 10;
explain (costs off) select * from rlp where a < 15;
explain (costs off) select * from rlp where a <= 15;
explain (costs off) select * from rlp where a > 15 and b = 'ab';
explain (costs off) select * from rlp where a = 16;
explain (costs off) select * from rlp where a = 16 and b in ('not', 'in', 'here');
explain (costs off) select * from rlp where a = 16 and b < 'ab';
explain (costs off) select * from rlp where a = 16 and b <= 'ab';
explain (costs off) select * from rlp where a = 16 and b is null;
explain (costs off) select * from rlp where a = 16 and b is not null;
explain (costs off) select * from rlp where a is null;
explain (costs off) select * from rlp where a is not null;
explain (costs off) select * from rlp where a > 30;
explain (costs off) select * from rlp where a = 30; /* only default is scanned */
explain (costs off) select * from rlp where a <= 31;
explain (costs off) select * from rlp where a = 1 or a = 7;
explain (costs off) select * from rlp where a = 1 or b = 'ab';
explain (costs off) select * from rlp where a > 20 and a < 27;
explain (costs off) select * from rlp where a = 29;
explain (costs off) select * from rlp where a >= 29;
explain (costs off) select * from rlp where a < 1 or (a > 20 and a < 25);
-- where clause contradicts sub-partition's constraint
explain (costs off) select * from rlp where a = 20 or a = 40;
explain (costs off) select * from rlp3 where a = 20; /* empty */
-- redundant clauses are eliminated
explain (costs off) select * from rlp where a > 1 and a = 10; /* only default */
explain (costs off) select * from rlp where a > 1 and a >=15; /* rlp3 onwards, including default */
explain (costs off) select * from rlp where a = 1 and a = 3; /* empty */
explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = 15);
-- multi-column keys
create table mc3p (a int, b int, c int) partition by range (a, abs(b), c);
create table mc3p_default partition of mc3p default;
create table mc3p0 partition of mc3p for values from (minvalue, minvalue, minvalue) to (1, 1, 1);
create table mc3p1 partition of mc3p for values from (1, 1, 1) to (10, 5, 10);
create table mc3p2 partition of mc3p for values from (10, 5, 10) to (10, 10, 10);
create table mc3p3 partition of mc3p for values from (10, 10, 10) to (10, 10, 20);
create table mc3p4 partition of mc3p for values from (10, 10, 20) to (10, maxvalue, maxvalue);
create table mc3p5 partition of mc3p for values from (11, 1, 1) to (20, 10, 10);
create table mc3p6 partition of mc3p for values from (20, 10, 10) to (20, 20, 20);
create table mc3p7 partition of mc3p for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue);
explain (costs off) select * from mc3p where a = 1;
explain (costs off) select * from mc3p where a = 1 and abs(b) < 1;
explain (costs off) select * from mc3p where a = 1 and abs(b) = 1;
explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8;
explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35;
explain (costs off) select * from mc3p where a > 10;
explain (costs off) select * from mc3p where a >= 10;
explain (costs off) select * from mc3p where a < 10;
explain (costs off) select * from mc3p where a <= 10 and abs(b) < 10;
explain (costs off) select * from mc3p where a = 11 and abs(b) = 0;
explain (costs off) select * from mc3p where a = 20 and abs(b) = 10 and c = 100;
explain (costs off) select * from mc3p where a > 20;
explain (costs off) select * from mc3p where a >= 20;
explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or (a = 10 and abs(b) = 5 and c = 10) or (a > 11 and a < 20);
explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or (a = 10 and abs(b) = 5 and c = 10) or (a > 11 and a < 20) or a < 1;
explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or (a = 10 and abs(b) = 5 and c = 10) or (a > 11 and a < 20) or a < 1 or a = 1;
explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1;
explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 10);
explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 9);
-- a simpler multi-column keys case
create table mc2p (a int, b int) partition by range (a, b);
create table mc2p_default partition of mc2p default;
create table mc2p0 partition of mc2p for values from (minvalue, minvalue) to (1, minvalue);
create table mc2p1 partition of mc2p for values from (1, minvalue) to (1, 1);
create table mc2p2 partition of mc2p for values from (1, 1) to (2, minvalue);
create table mc2p3 partition of mc2p for values from (2, minvalue) to (2, 1);
create table mc2p4 partition of mc2p for values from (2, 1) to (2, maxvalue);
create table mc2p5 partition of mc2p for values from (2, maxvalue) to (maxvalue, maxvalue);
explain (costs off) select * from mc2p where a < 2;
explain (costs off) select * from mc2p where a = 2 and b < 1;
explain (costs off) select * from mc2p where a > 1;
explain (costs off) select * from mc2p where a = 1 and b > 1;
-- all partitions but the default one should be pruned
explain (costs off) select * from mc2p where a = 1 and b is null;
explain (costs off) select * from mc2p where a is null and b is null;
explain (costs off) select * from mc2p where a is null and b = 1;
explain (costs off) select * from mc2p where a is null;
explain (costs off) select * from mc2p where b is null;
-- boolean partitioning
create table boolpart (a bool) partition by list (a);
create table boolpart_default partition of boolpart default;
create table boolpart_t partition of boolpart for values in ('true');
create table boolpart_f partition of boolpart for values in ('false');
explain (costs off) select * from boolpart where a in (true, false);
explain (costs off) select * from boolpart where a = false;
explain (costs off) select * from boolpart where not a = false;
explain (costs off) select * from boolpart where a is true or a is not true;
explain (costs off) select * from boolpart where a is not true;
explain (costs off) select * from boolpart where a is not true and a is not false;
explain (costs off) select * from boolpart where a is unknown;
explain (costs off) select * from boolpart where a is not unknown;
create table boolrangep (a bool, b bool, c int) partition by range (a,b,c);
create table boolrangep_tf partition of boolrangep for values from ('true', 'false', 0) to ('true', 'false', 100);
create table boolrangep_ft partition of boolrangep for values from ('false', 'true', 0) to ('false', 'true', 100);
create table boolrangep_ff1 partition of boolrangep for values from ('false', 'false', 0) to ('false', 'false', 50);
create table boolrangep_ff2 partition of boolrangep for values from ('false', 'false', 50) to ('false', 'false', 100);
-- try a more complex case that's been known to trip up pruning in the past
explain (costs off) select * from boolrangep where not a and not b and c = 25;
-- test scalar-to-array operators
create table coercepart (a varchar) partition by list (a);
create table coercepart_ab partition of coercepart for values in ('ab');
create table coercepart_bc partition of coercepart for values in ('bc');
create table coercepart_cd partition of coercepart for values in ('cd');
explain (costs off) select * from coercepart where a in ('ab', to_char(125, '999'));
explain (costs off) select * from coercepart where a ~ any ('{ab}');
explain (costs off) select * from coercepart where a !~ all ('{ab}');
explain (costs off) select * from coercepart where a ~ any ('{ab,bc}');
explain (costs off) select * from coercepart where a !~ all ('{ab,bc}');
explain (costs off) select * from coercepart where a = any ('{ab,bc}');
explain (costs off) select * from coercepart where a = any ('{ab,null}');
explain (costs off) select * from coercepart where a = any (null::text[]);
explain (costs off) select * from coercepart where a = all ('{ab}');
explain (costs off) select * from coercepart where a = all ('{ab,bc}');
explain (costs off) select * from coercepart where a = all ('{ab,null}');
explain (costs off) select * from coercepart where a = all (null::text[]);
drop table coercepart;
CREATE TABLE part (a INT, b INT) PARTITION BY LIST (a);
CREATE TABLE part_p1 PARTITION OF part FOR VALUES IN (-2,-1,0,1,2);
CREATE TABLE part_p2 PARTITION OF part DEFAULT PARTITION BY RANGE(a);
CREATE TABLE part_p2_p1 PARTITION OF part_p2 DEFAULT;
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
6 years ago
CREATE TABLE part_rev (b INT, c INT, a INT);
ALTER TABLE part ATTACH PARTITION part_rev FOR VALUES IN (3); -- fail
ALTER TABLE part_rev DROP COLUMN c;
ALTER TABLE part ATTACH PARTITION part_rev FOR VALUES IN (3); -- now it's ok
INSERT INTO part VALUES (-1,-1), (1,1), (2,NULL), (NULL,-2),(NULL,NULL);
EXPLAIN (COSTS OFF) SELECT tableoid::regclass as part, a, b FROM part WHERE a IS NULL ORDER BY 1, 2, 3;
Fix EXPLAIN's column alias output for mismatched child tables. If an inheritance/partitioning parent table is assigned some column alias names in the query, EXPLAIN mapped those aliases onto the child tables' columns by physical position, resulting in bogus output if a child table's columns aren't one-for-one with the parent's. To fix, make expand_single_inheritance_child() generate a correctly re-mapped column alias list, rather than just copying the parent RTE's alias node. (We have to fill the alias field, not just adjust the eref field, because ruleutils.c will ignore eref in favor of looking at the real column names.) This means that child tables will now always have alias fields in plan rtables, where before they might not have. That results in a rather substantial set of regression test output changes: EXPLAIN will now always show child tables with aliases that match the parent table (usually with "_N" appended for uniqueness). But that seems like a net positive for understandability, since the parent alias corresponds to something that actually appeared in the original query, while the child table names didn't. (Note that this does not change anything for cases where an explicit table alias was written in the query for the parent table; it just makes cases without such aliases behave similarly to that.) Hence, while we could avoid these subsidiary changes if we made inherit.c more complicated, we choose not to. Discussion: https://postgr.es/m/12424.1575168015@sss.pgh.pa.us
6 years ago
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM part p(x) ORDER BY x;
--
-- some more cases
--
--
-- pruning for partitioned table appearing inside a sub-query
--
-- pruning won't work for mc3p, because some keys are Params
explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = t1.b and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1;
-- pruning should work fine, because values for a prefix of keys (a, b) are
-- available
explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1;
-- also here, because values for all keys are provided
explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1;
--
-- pruning with clauses containing <> operator
--
-- doesn't prune range partitions
create table rp (a int) partition by range (a);
create table rp0 partition of rp for values from (minvalue) to (1);
create table rp1 partition of rp for values from (1) to (2);
create table rp2 partition of rp for values from (2) to (maxvalue);
explain (costs off) select * from rp where a <> 1;
explain (costs off) select * from rp where a <> 1 and a <> 2;
-- null partition should be eliminated due to strict <> clause.
explain (costs off) select * from lp where a <> 'a';
-- ensure we detect contradictions in clauses; a can't be NULL and NOT NULL.
explain (costs off) select * from lp where a <> 'a' and a is null;
explain (costs off) select * from lp where (a <> 'a' and a <> 'd') or a is null;
-- check that it also works for a partitioned table that's not root,
-- which in this case are partitions of rlp that are themselves
-- list-partitioned on b
explain (costs off) select * from rlp where a = 15 and b <> 'ab' and b <> 'cd' and b <> 'xy' and b is not null;
--
-- different collations for different keys with same expression
--
create table coll_pruning_multi (a text) partition by range (substr(a, 1) collate "POSIX", substr(a, 1) collate "C");
create table coll_pruning_multi1 partition of coll_pruning_multi for values from ('a', 'a') to ('a', 'e');
create table coll_pruning_multi2 partition of coll_pruning_multi for values from ('a', 'e') to ('a', 'z');
create table coll_pruning_multi3 partition of coll_pruning_multi for values from ('b', 'a') to ('b', 'e');
-- no pruning, because no value for the leading key
explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C";
-- pruning, with a value provided for the leading key
explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'a' collate "POSIX";
-- pruning, with values provided for both keys
explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C" and substr(a, 1) = 'a' collate "POSIX";
--
-- LIKE operators don't prune
--
create table like_op_noprune (a text) partition by list (a);
create table like_op_noprune1 partition of like_op_noprune for values in ('ABC');
create table like_op_noprune2 partition of like_op_noprune for values in ('BCD');
explain (costs off) select * from like_op_noprune where a like '%BC';
--
-- tests wherein clause value requires a cross-type comparison function
--
create table lparted_by_int2 (a smallint) partition by list (a);
create table lparted_by_int2_1 partition of lparted_by_int2 for values in (1);
create table lparted_by_int2_16384 partition of lparted_by_int2 for values in (16384);
explain (costs off) select * from lparted_by_int2 where a = 100_000_000_000_000;
create table rparted_by_int2 (a smallint) partition by range (a);
create table rparted_by_int2_1 partition of rparted_by_int2 for values from (1) to (10);
create table rparted_by_int2_16384 partition of rparted_by_int2 for values from (10) to (16384);
-- all partitions pruned
explain (costs off) select * from rparted_by_int2 where a > 100_000_000_000_000;
create table rparted_by_int2_maxvalue partition of rparted_by_int2 for values from (16384) to (maxvalue);
-- all partitions but rparted_by_int2_maxvalue pruned
explain (costs off) select * from rparted_by_int2 where a > 100_000_000_000_000;
drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pruning_multi, like_op_noprune, lparted_by_int2, rparted_by_int2;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
--
-- Test Partition pruning for HASH partitioning
--
-- Use hand-rolled hash functions and operator classes to get predictable
-- result on different machines. See the definitions of
-- part_part_test_int4_ops and part_test_text_ops in insert.sql.
--
Fix hash partition pruning with asymmetric partition sets. perform_pruning_combine_step() was not taught about the number of partition indexes used in hash partitioning; more embarrassingly, get_matching_hash_bounds() also had it wrong. These errors are masked in the common case where all the partitions have the same modulus and no partition is missing. However, with missing or unequal-size partitions, we could erroneously prune some partitions that need to be scanned, leading to silently wrong query answers. While a minimal-footprint fix for this could be to export get_partition_bound_num_indexes and make the incorrect functions use it, I'm of the opinion that that function should never have existed in the first place. It's not reasonable data structure design that PartitionBoundInfoData lacks any explicit record of the length of its indexes[] array. Perhaps that was all right when it could always be assumed equal to ndatums, but something should have been done about it as soon as that stopped being true. Putting in an explicit "nindexes" field makes both partition_bounds_equal() and partition_bounds_copy() simpler, safer, and faster than before, and removes explicit knowledge of the number-of-partition-indexes rules from some other places too. This change also makes get_hash_partition_greatest_modulus obsolete. I left that in place in case any external code uses it, but no core code does anymore. Per bug #16840 from Michał Albrycht. Back-patch to v11 where the hash partitioning code came in. (In the back branches, add the new field at the end of PartitionBoundInfoData to minimize ABI risks.) Discussion: https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
5 years ago
create table hp (a int, b text, c int)
partition by hash (a part_test_int4_ops, b part_test_text_ops);
create table hp0 partition of hp for values with (modulus 4, remainder 0);
create table hp3 partition of hp for values with (modulus 4, remainder 3);
create table hp1 partition of hp for values with (modulus 4, remainder 1);
create table hp2 partition of hp for values with (modulus 4, remainder 2);
Fix hash partition pruning with asymmetric partition sets. perform_pruning_combine_step() was not taught about the number of partition indexes used in hash partitioning; more embarrassingly, get_matching_hash_bounds() also had it wrong. These errors are masked in the common case where all the partitions have the same modulus and no partition is missing. However, with missing or unequal-size partitions, we could erroneously prune some partitions that need to be scanned, leading to silently wrong query answers. While a minimal-footprint fix for this could be to export get_partition_bound_num_indexes and make the incorrect functions use it, I'm of the opinion that that function should never have existed in the first place. It's not reasonable data structure design that PartitionBoundInfoData lacks any explicit record of the length of its indexes[] array. Perhaps that was all right when it could always be assumed equal to ndatums, but something should have been done about it as soon as that stopped being true. Putting in an explicit "nindexes" field makes both partition_bounds_equal() and partition_bounds_copy() simpler, safer, and faster than before, and removes explicit knowledge of the number-of-partition-indexes rules from some other places too. This change also makes get_hash_partition_greatest_modulus obsolete. I left that in place in case any external code uses it, but no core code does anymore. Per bug #16840 from Michał Albrycht. Back-patch to v11 where the hash partitioning code came in. (In the back branches, add the new field at the end of PartitionBoundInfoData to minimize ABI risks.) Discussion: https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
5 years ago
insert into hp values (null, null, 0);
insert into hp values (1, null, 1);
insert into hp values (1, 'xxx', 2);
insert into hp values (null, 'xxx', 3);
insert into hp values (2, 'xxx', 4);
insert into hp values (1, 'abcde', 5);
select tableoid::regclass, * from hp order by c;
-- partial keys won't prune, nor would non-equality conditions
explain (costs off) select * from hp where a = 1;
explain (costs off) select * from hp where b = 'xxx';
explain (costs off) select * from hp where a is null;
explain (costs off) select * from hp where b is null;
explain (costs off) select * from hp where a < 1 and b = 'xxx';
explain (costs off) select * from hp where a <> 1 and b = 'yyy';
explain (costs off) select * from hp where a <> 1 and b <> 'xxx';
-- pruning should work if either a value or a IS NULL clause is provided for
-- each of the keys
explain (costs off) select * from hp where a is null and b is null;
explain (costs off) select * from hp where a = 1 and b is null;
explain (costs off) select * from hp where a = 1 and b = 'xxx';
explain (costs off) select * from hp where a is null and b = 'xxx';
explain (costs off) select * from hp where a = 2 and b = 'xxx';
explain (costs off) select * from hp where a = 1 and b = 'abcde';
explain (costs off) select * from hp where (a = 1 and b = 'abcde') or (a = 2 and b = 'xxx') or (a is null and b is null);
Fix hash partition pruning with asymmetric partition sets. perform_pruning_combine_step() was not taught about the number of partition indexes used in hash partitioning; more embarrassingly, get_matching_hash_bounds() also had it wrong. These errors are masked in the common case where all the partitions have the same modulus and no partition is missing. However, with missing or unequal-size partitions, we could erroneously prune some partitions that need to be scanned, leading to silently wrong query answers. While a minimal-footprint fix for this could be to export get_partition_bound_num_indexes and make the incorrect functions use it, I'm of the opinion that that function should never have existed in the first place. It's not reasonable data structure design that PartitionBoundInfoData lacks any explicit record of the length of its indexes[] array. Perhaps that was all right when it could always be assumed equal to ndatums, but something should have been done about it as soon as that stopped being true. Putting in an explicit "nindexes" field makes both partition_bounds_equal() and partition_bounds_copy() simpler, safer, and faster than before, and removes explicit knowledge of the number-of-partition-indexes rules from some other places too. This change also makes get_hash_partition_greatest_modulus obsolete. I left that in place in case any external code uses it, but no core code does anymore. Per bug #16840 from Michał Albrycht. Back-patch to v11 where the hash partitioning code came in. (In the back branches, add the new field at the end of PartitionBoundInfoData to minimize ABI risks.) Discussion: https://postgr.es/m/16840-571a22976f829ad4@postgresql.org
5 years ago
-- test pruning when not all the partitions exist
drop table hp1;
drop table hp3;
explain (costs off) select * from hp where a = 1 and b = 'abcde';
explain (costs off) select * from hp where a = 1 and b = 'abcde' and
(c = 2 or c = 3);
drop table hp2;
explain (costs off) select * from hp where a = 1 and b = 'abcde' and
(c = 2 or c = 3);
drop table hp;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
--
-- Test runtime partition pruning
--
create table ab (a int not null, b int not null) partition by list (a);
create table ab_a2 partition of ab for values in(2) partition by list (b);
create table ab_a2_b1 partition of ab_a2 for values in (1);
create table ab_a2_b2 partition of ab_a2 for values in (2);
create table ab_a2_b3 partition of ab_a2 for values in (3);
create table ab_a1 partition of ab for values in(1) partition by list (b);
create table ab_a1_b1 partition of ab_a1 for values in (1);
create table ab_a1_b2 partition of ab_a1 for values in (2);
create table ab_a1_b3 partition of ab_a1 for values in (3);
create table ab_a3 partition of ab for values in(3) partition by list (b);
create table ab_a3_b1 partition of ab_a3 for values in (1);
create table ab_a3_b2 partition of ab_a3 for values in (2);
create table ab_a3_b3 partition of ab_a3 for values in (3);
-- Disallow index only scans as concurrent transactions may stop visibility
-- bits being set causing "Heap Fetches" to be unstable in the EXPLAIN ANALYZE
-- output.
set enable_indexonlyscan = off;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
prepare ab_q1 (int, int, int) as
select * from ab where a between $1 and $2 and b <= $3;
explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2, 3);
explain (analyze, costs off, summary off, timing off) execute ab_q1 (1, 2, 3);
deallocate ab_q1;
-- Runtime pruning after optimizer pruning
prepare ab_q1 (int, int) as
select a from ab where a between $1 and $2 and b < 3;
explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2);
explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 4);
-- Ensure a mix of PARAM_EXTERN and PARAM_EXEC Params work together at
-- different levels of partitioning.
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
prepare ab_q2 (int, int) as
select a from ab where a between $1 and $2 and b < (select 3);
explain (analyze, costs off, summary off, timing off) execute ab_q2 (2, 2);
-- As above, but swap the PARAM_EXEC Param to the first partition level
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
prepare ab_q3 (int, int) as
select a from ab where b between $1 and $2 and a < (select 3);
explain (analyze, costs off, summary off, timing off) execute ab_q3 (2, 2);
-- Test a backwards Append scan
create table list_part (a int) partition by list (a);
create table list_part1 partition of list_part for values in (1);
create table list_part2 partition of list_part for values in (2);
create table list_part3 partition of list_part for values in (3);
create table list_part4 partition of list_part for values in (4);
insert into list_part select generate_series(1,4);
begin;
-- Don't select an actual value out of the table as the order of the Append's
-- subnodes may not be stable.
declare cur SCROLL CURSOR for select 1 from list_part where a > (select 1) and a < (select 4);
-- move beyond the final row
move 3 from cur;
-- Ensure we get two rows.
fetch backward all from cur;
commit;
begin;
-- Test run-time pruning using stable functions
create function list_part_fn(int) returns int as $$ begin return $1; end;$$ language plpgsql stable;
-- Ensure pruning works using a stable function containing no Vars
explain (analyze, costs off, summary off, timing off) select * from list_part where a = list_part_fn(1);
-- Ensure pruning does not take place when the function has a Var parameter
explain (analyze, costs off, summary off, timing off) select * from list_part where a = list_part_fn(a);
-- Ensure pruning does not take place when the expression contains a Var.
explain (analyze, costs off, summary off, timing off) select * from list_part where a = list_part_fn(1) + a;
rollback;
drop table list_part;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Parallel append
-- Parallel queries won't necessarily get as many workers as the planner
-- asked for. This affects not only the "Workers Launched:" field of EXPLAIN
-- results, but also row counts and loop counts for parallel scans, Gathers,
-- and everything in between. This function filters out the values we can't
-- rely on to be stable.
-- This removes enough info that you might wonder why bother with EXPLAIN
-- ANALYZE at all. The answer is that we need to see '(never executed)'
-- notations because that's the only way to verify runtime pruning.
create function explain_parallel_append(text) returns setof text
language plpgsql as
$$
declare
ln text;
begin
for ln in
execute format('explain (analyze, costs off, summary off, timing off) %s',
$1)
loop
ln := regexp_replace(ln, 'Workers Launched: \d+', 'Workers Launched: N');
ln := regexp_replace(ln, 'actual rows=\d+ loops=\d+', 'actual rows=N loops=N');
ln := regexp_replace(ln, 'Rows Removed by Filter: \d+', 'Rows Removed by Filter: N');
return next ln;
end loop;
end;
$$;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
prepare ab_q4 (int, int) as
select avg(a) from ab where a between $1 and $2 and b < 4;
-- Encourage use of parallel plans
set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set min_parallel_table_scan_size = 0;
set max_parallel_workers_per_gather = 2;
select explain_parallel_append('execute ab_q4 (2, 2)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Test run-time pruning with IN lists.
prepare ab_q5 (int, int, int) as
select avg(a) from ab where a in($1,$2,$3) and b < 4;
select explain_parallel_append('execute ab_q5 (1, 1, 1)');
select explain_parallel_append('execute ab_q5 (2, 3, 3)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Try some params whose values do not belong to any partition.
select explain_parallel_append('execute ab_q5 (33, 44, 55)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Test Parallel Append with PARAM_EXEC Params
select explain_parallel_append('select count(*) from ab where (a = (select 1) or a = (select 3)) and b = 2');
-- Test pruning during parallel nested loop query
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
create table lprt_a (a int not null);
-- Insert some values we won't find in ab
insert into lprt_a select 0 from generate_series(1,100);
-- and insert some values that we should find.
insert into lprt_a values(1),(1);
analyze lprt_a;
create index ab_a2_b1_a_idx on ab_a2_b1 (a);
create index ab_a2_b2_a_idx on ab_a2_b2 (a);
create index ab_a2_b3_a_idx on ab_a2_b3 (a);
create index ab_a1_b1_a_idx on ab_a1_b1 (a);
create index ab_a1_b2_a_idx on ab_a1_b2 (a);
create index ab_a1_b3_a_idx on ab_a1_b3 (a);
create index ab_a3_b1_a_idx on ab_a3_b1 (a);
create index ab_a3_b2_a_idx on ab_a3_b2 (a);
create index ab_a3_b3_a_idx on ab_a3_b3 (a);
set enable_hashjoin = 0;
set enable_mergejoin = 0;
set enable_memoize = 0;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(0, 0, 1)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Ensure the same partitions are pruned when we make the nested loop
-- parameter an Expr rather than a plain Param.
select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a + 0 where a.a in(0, 0, 1)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
insert into lprt_a values(3),(3);
select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 3)');
select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
delete from lprt_a where a = 1;
select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)');
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
reset enable_hashjoin;
reset enable_mergejoin;
reset enable_memoize;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
reset parallel_setup_cost;
reset parallel_tuple_cost;
reset min_parallel_table_scan_size;
reset max_parallel_workers_per_gather;
-- Test run-time partition pruning with an initplan
explain (analyze, costs off, summary off, timing off)
select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a);
-- Test run-time partition pruning with UNION ALL parents
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1);
-- A case containing a UNION ALL with a non-partitioned child.
explain (analyze, costs off, summary off, timing off)
select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1);
-- Another UNION ALL test, but containing a mix of exec init and exec run-time pruning.
create table xy_1 (x int, y int);
insert into xy_1 values(100,-10);
set enable_bitmapscan = 0;
set enable_indexscan = 0;
prepare ab_q6 as
select * from (
select tableoid::regclass,a,b from ab
union all
select tableoid::regclass,x,y from xy_1
union all
select tableoid::regclass,a,b from ab
) ab where a = $1 and b = (select -10);
-- Ensure the xy_1 subplan is not pruned.
explain (analyze, costs off, summary off, timing off) execute ab_q6(1);
-- Ensure we see just the xy_1 row.
execute ab_q6(100);
reset enable_bitmapscan;
reset enable_indexscan;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
deallocate ab_q1;
deallocate ab_q2;
deallocate ab_q3;
deallocate ab_q4;
deallocate ab_q5;
deallocate ab_q6;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- UPDATE on a partition subtree has been seen to have problems.
insert into ab values (1,2);
explain (analyze, costs off, summary off, timing off)
update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
table ab;
-- Test UPDATE where source relation has run-time pruning enabled
truncate ab;
insert into ab values (1, 1), (1, 2), (1, 3), (2, 1);
explain (analyze, costs off, summary off, timing off)
update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
select tableoid::regclass, * from ab;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
drop table ab, lprt_a;
-- Join
create table tbl1(col1 int);
insert into tbl1 values (501), (505);
-- Basic table
create table tprt (col1 int) partition by range (col1);
create table tprt_1 partition of tprt for values from (1) to (501);
create table tprt_2 partition of tprt for values from (501) to (1001);
create table tprt_3 partition of tprt for values from (1001) to (2001);
create table tprt_4 partition of tprt for values from (2001) to (3001);
create table tprt_5 partition of tprt for values from (3001) to (4001);
create table tprt_6 partition of tprt for values from (4001) to (5001);
create index tprt1_idx on tprt_1 (col1);
create index tprt2_idx on tprt_2 (col1);
create index tprt3_idx on tprt_3 (col1);
create index tprt4_idx on tprt_4 (col1);
create index tprt5_idx on tprt_5 (col1);
create index tprt6_idx on tprt_6 (col1);
insert into tprt values (10), (20), (501), (502), (505), (1001), (4500);
set enable_hashjoin = off;
set enable_mergejoin = off;
explain (analyze, costs off, summary off, timing off)
select * from tbl1 join tprt on tbl1.col1 > tprt.col1;
explain (analyze, costs off, summary off, timing off)
select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 > tprt.col1
order by tbl1.col1, tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 = tprt.col1
order by tbl1.col1, tprt.col1;
-- Multiple partitions
insert into tbl1 values (1001), (1010), (1011);
explain (analyze, costs off, summary off, timing off)
select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1;
explain (analyze, costs off, summary off, timing off)
select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 > tprt.col1
order by tbl1.col1, tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 = tprt.col1
order by tbl1.col1, tprt.col1;
-- Last partition
delete from tbl1;
insert into tbl1 values (4400);
explain (analyze, costs off, summary off, timing off)
select * from tbl1 join tprt on tbl1.col1 < tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 < tprt.col1
order by tbl1.col1, tprt.col1;
-- No matching partition
delete from tbl1;
insert into tbl1 values (10000);
explain (analyze, costs off, summary off, timing off)
select * from tbl1 join tprt on tbl1.col1 = tprt.col1;
select tbl1.col1, tprt.col1 from tbl1
inner join tprt on tbl1.col1 = tprt.col1
order by tbl1.col1, tprt.col1;
drop table tbl1, tprt;
-- Test with columns defined in varying orders between each level
create table part_abc (a int not null, b int not null, c int not null) partition by list (a);
create table part_bac (b int not null, a int not null, c int not null) partition by list (b);
create table part_cab (c int not null, a int not null, b int not null) partition by list (c);
create table part_abc_p1 (a int not null, b int not null, c int not null);
alter table part_abc attach partition part_bac for values in(1);
alter table part_bac attach partition part_cab for values in(2);
alter table part_cab attach partition part_abc_p1 for values in(3);
prepare part_abc_q1 (int, int, int) as
select * from part_abc where a = $1 and b = $2 and c = $3;
-- Single partition should be scanned.
explain (analyze, costs off, summary off, timing off) execute part_abc_q1 (1, 2, 3);
deallocate part_abc_q1;
drop table part_abc;
-- Ensure that an Append node properly handles a sub-partitioned table
-- matching without any of its leaf partitions matching the clause.
create table listp (a int, b int) partition by list (a);
create table listp_1 partition of listp for values in(1) partition by list (b);
create table listp_1_1 partition of listp_1 for values in(1);
create table listp_2 partition of listp for values in(2) partition by list (b);
create table listp_2_1 partition of listp_2 for values in(2);
select * from listp where b = 1;
-- Ensure that an Append node properly can handle selection of all first level
-- partitions before finally detecting the correct set of 2nd level partitions
-- which match the given parameter.
prepare q1 (int,int) as select * from listp where b in ($1,$2);
explain (analyze, costs off, summary off, timing off) execute q1 (1,1);
explain (analyze, costs off, summary off, timing off) execute q1 (2,2);
-- Try with no matching partitions.
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
explain (analyze, costs off, summary off, timing off) execute q1 (0,0);
deallocate q1;
-- Test more complex cases where a not-equal condition further eliminates partitions.
prepare q1 (int,int,int,int) as select * from listp where b in($1,$2) and $3 <> b and $4 <> b;
-- Both partitions allowed by IN clause, but one disallowed by <> clause
explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,0);
-- Both partitions allowed by IN clause, then both excluded again by <> clauses.
explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,1);
-- Ensure Params that evaluate to NULL properly prune away all partitions
explain (analyze, costs off, summary off, timing off)
select * from listp where a = (select null::int);
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
drop table listp;
--
-- check that stable query clauses are only used in run-time pruning
--
create table stable_qual_pruning (a timestamp) partition by range (a);
create table stable_qual_pruning1 partition of stable_qual_pruning
for values from ('2000-01-01') to ('2000-02-01');
create table stable_qual_pruning2 partition of stable_qual_pruning
for values from ('2000-02-01') to ('2000-03-01');
create table stable_qual_pruning3 partition of stable_qual_pruning
for values from ('3000-02-01') to ('3000-03-01');
-- comparison against a stable value requires run-time pruning
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning where a < localtimestamp;
-- timestamp < timestamptz comparison is only stable, not immutable
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning where a < '2000-02-01'::timestamptz;
-- check ScalarArrayOp cases
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(array['2010-02-01', '2020-01-01']::timestamp[]);
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(array['2000-02-01', '2010-01-01']::timestamp[]);
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(array['2000-02-01', localtimestamp]::timestamp[]);
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(array['2010-02-01', '2020-01-01']::timestamptz[]);
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(array['2000-02-01', '2010-01-01']::timestamptz[]);
explain (analyze, costs off, summary off, timing off)
select * from stable_qual_pruning
where a = any(null::timestamptz[]);
drop table stable_qual_pruning;
--
-- Check that pruning with composite range partitioning works correctly when
-- it must ignore clauses for trailing keys once it has seen a clause with
-- non-inclusive operator for an earlier key
--
create table mc3p (a int, b int, c int) partition by range (a, abs(b), c);
create table mc3p0 partition of mc3p
for values from (0, 0, 0) to (0, maxvalue, maxvalue);
create table mc3p1 partition of mc3p
for values from (1, 1, 1) to (2, minvalue, minvalue);
create table mc3p2 partition of mc3p
for values from (2, minvalue, minvalue) to (3, maxvalue, maxvalue);
insert into mc3p values (0, 1, 1), (1, 1, 1), (2, 1, 1);
explain (analyze, costs off, summary off, timing off)
select * from mc3p where a < 3 and abs(b) = 1;
Restructure creation of run-time pruning steps. Previously, gen_partprune_steps() always built executor pruning steps using all suitable clauses, including those containing PARAM_EXEC Params. This meant that the pruning steps were only completely safe for executor run-time (scan start) pruning. To prune at executor startup, we had to ignore the steps involving exec Params. But this doesn't really work in general, since there may be logic changes needed as well --- for example, pruning according to the last operator's btree strategy is the wrong thing if we're not applying that operator. The rules embodied in gen_partprune_steps() and its minions are sufficiently complicated that tracking their incremental effects in other logic seems quite impractical. Short of a complete redesign, the only safe fix seems to be to run gen_partprune_steps() twice, once to create executor startup pruning steps and then again for run-time pruning steps. We can save a few cycles however by noting during the first scan whether we rejected any clauses because they involved exec Params --- if not, we don't need to do the second scan. In support of this, refactor the internal APIs in partprune.c to make more use of passing information in the GeneratePruningStepsContext struct, rather than as separate arguments. This is, I hope, the last piece of our response to a bug report from Alan Jackson. Back-patch to v11 where this code came in. Discussion: https://postgr.es/m/FAD28A83-AC73-489E-A058-2681FA31D648@tvsquared.com
6 years ago
--
-- Check that pruning with composite range partitioning works correctly when
-- a combination of runtime parameters is specified, not all of whose values
-- are available at the same time
--
prepare ps1 as
select * from mc3p where a = $1 and abs(b) < (select 3);
explain (analyze, costs off, summary off, timing off)
execute ps1(1);
deallocate ps1;
prepare ps2 as
select * from mc3p where a <= $1 and abs(b) < (select 3);
explain (analyze, costs off, summary off, timing off)
execute ps2(1);
deallocate ps2;
drop table mc3p;
Support partition pruning at execution time Existing partition pruning is only able to work at plan time, for query quals that appear in the parsed query. This is good but limiting, as there can be parameters that appear later that can be usefully used to further prune partitions. This commit adds support for pruning subnodes of Append which cannot possibly contain any matching tuples, during execution, by evaluating Params to determine the minimum set of subnodes that can possibly match. We support more than just simple Params in WHERE clauses. Support additionally includes: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Partition pruning is performed in two ways. When Params external to the plan are found to match the partition key we attempt to prune away unneeded Append subplans during the initialization of the executor. This allows us to bypass the initialization of non-matching subplans meaning they won't appear in the EXPLAIN or EXPLAIN ANALYZE output. For parameters whose value is only known during the actual execution then the pruning of these subplans must wait. Subplans which are eliminated during this stage of pruning are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place, the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to the elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. This is due to the list of matching subnodes having to be evaluated whenever a parameter which was found to match the partition key changes. This commit required some additional infrastructure that permits the building of a data structure which is able to perform the translation of the matching partition IDs, as returned by get_matching_partitions, into the list index of a subpaths list, as exist in node types such as Append, MergeAppend and ModifyTable. This allows us to translate a list of clauses into a Bitmapset of all the subpath indexes which must be included to satisfy the clause list. Author: David Rowley, based on an earlier effort by Beena Emerson Reviewers: Amit Langote, Robert Haas, Amul Sul, Rajkumar Raghuwanshi, Jesper Pedersen Discussion: https://postgr.es/m/CAOG9ApE16ac-_VVZVvv0gePSgkg_BwYEV1NBqZFqDR2bBE0X0A@mail.gmail.com
8 years ago
-- Ensure runtime pruning works with initplans params with boolean types
create table boolvalues (value bool not null);
insert into boolvalues values('t'),('f');
create table boolp (a bool) partition by list (a);
create table boolp_t partition of boolp for values in('t');
create table boolp_f partition of boolp for values in('f');
explain (analyze, costs off, summary off, timing off)
select * from boolp where a = (select value from boolvalues where value);
explain (analyze, costs off, summary off, timing off)
select * from boolp where a = (select value from boolvalues where not value);
drop table boolp;
--
-- Test run-time pruning of MergeAppend subnodes
--
set enable_seqscan = off;
set enable_sort = off;
Use Append rather than MergeAppend for scanning ordered partitions. If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
7 years ago
create table ma_test (a int, b int) partition by range (a);
create table ma_test_p1 partition of ma_test for values from (0) to (10);
create table ma_test_p2 partition of ma_test for values from (10) to (20);
create table ma_test_p3 partition of ma_test for values from (20) to (30);
Use Append rather than MergeAppend for scanning ordered partitions. If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
7 years ago
insert into ma_test select x,x from generate_series(0,29) t(x);
create index on ma_test (b);
analyze ma_test;
Use Append rather than MergeAppend for scanning ordered partitions. If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
7 years ago
prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b;
explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
execute mt_q1(15);
explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
execute mt_q1(25);
-- Ensure MergeAppend behaves correctly when no subplans match
explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
execute mt_q1(35);
deallocate mt_q1;
prepare mt_q2 (int) as select * from ma_test where a >= $1 order by b limit 1;
-- Ensure output list looks sane when the MergeAppend has no subplans.
explain (analyze, verbose, costs off, summary off, timing off) execute mt_q2 (35);
deallocate mt_q2;
-- ensure initplan params properly prune partitions
Use Append rather than MergeAppend for scanning ordered partitions. If we need ordered output from a scan of a partitioned table, but the ordering matches the partition ordering, then we don't need to use a MergeAppend to combine the pre-ordered per-partition scan results: a plain Append will produce the same results. This both saves useless comparison work inside the MergeAppend proper, and allows us to start returning tuples after istarting up just the first child node not all of them. However, all is not peaches and cream, because if some of the child nodes have high startup costs then there will be big discontinuities in the tuples-returned-versus-elapsed-time curve. The planner's cost model cannot handle that (yet, anyway). If we model the Append's startup cost as being just the first child's startup cost, we may drastically underestimate the cost of fetching slightly more tuples than are available from the first child. Since we've had bad experiences with over-optimistic choices of "fast start" plans for ORDER BY LIMIT queries, that seems scary. As a klugy workaround, set the startup cost estimate for an ordered Append to be the sum of its children's startup costs (as MergeAppend would). This doesn't really describe reality, but it's less likely to cause a bad plan choice than an underestimated startup cost would. In practice, the cases where we really care about this optimization will have child plans that are IndexScans with zero startup cost, so that the overly conservative estimate is still just zero. David Rowley, reviewed by Julien Rouhaud and Antonin Houska Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
7 years ago
explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b;
reset enable_seqscan;
reset enable_sort;
drop table ma_test;
reset enable_indexonlyscan;
--
-- check that pruning works properly when the partition key is of a
-- pseudotype
--
-- array type list partition key
create table pp_arrpart (a int[]) partition by list (a);
create table pp_arrpart1 partition of pp_arrpart for values in ('{1}');
create table pp_arrpart2 partition of pp_arrpart for values in ('{2, 3}', '{4, 5}');
explain (costs off) select * from pp_arrpart where a = '{1}';
explain (costs off) select * from pp_arrpart where a = '{1, 2}';
explain (costs off) select * from pp_arrpart where a in ('{4, 5}', '{1}');
explain (costs off) update pp_arrpart set a = a where a = '{1}';
explain (costs off) delete from pp_arrpart where a = '{1}';
drop table pp_arrpart;
-- array type hash partition key
create table pph_arrpart (a int[]) partition by hash (a);
create table pph_arrpart1 partition of pph_arrpart for values with (modulus 2, remainder 0);
create table pph_arrpart2 partition of pph_arrpart for values with (modulus 2, remainder 1);
insert into pph_arrpart values ('{1}'), ('{1, 2}'), ('{4, 5}');
select tableoid::regclass, * from pph_arrpart order by 1;
explain (costs off) select * from pph_arrpart where a = '{1}';
explain (costs off) select * from pph_arrpart where a = '{1, 2}';
explain (costs off) select * from pph_arrpart where a in ('{4, 5}', '{1}');
drop table pph_arrpart;
-- enum type list partition key
create type pp_colors as enum ('green', 'blue', 'black');
create table pp_enumpart (a pp_colors) partition by list (a);
create table pp_enumpart_green partition of pp_enumpart for values in ('green');
create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
explain (costs off) select * from pp_enumpart where a = 'blue';
explain (costs off) select * from pp_enumpart where a = 'black';
drop table pp_enumpart;
drop type pp_colors;
-- record type as partition key
create type pp_rectype as (a int, b int);
create table pp_recpart (a pp_rectype) partition by list (a);
create table pp_recpart_11 partition of pp_recpart for values in ('(1,1)');
create table pp_recpart_23 partition of pp_recpart for values in ('(2,3)');
explain (costs off) select * from pp_recpart where a = '(1,1)'::pp_rectype;
explain (costs off) select * from pp_recpart where a = '(1,2)'::pp_rectype;
drop table pp_recpart;
drop type pp_rectype;
-- range type partition key
create table pp_intrangepart (a int4range) partition by list (a);
create table pp_intrangepart12 partition of pp_intrangepart for values in ('[1,2]');
create table pp_intrangepart2inf partition of pp_intrangepart for values in ('[2,)');
explain (costs off) select * from pp_intrangepart where a = '[1,2]'::int4range;
explain (costs off) select * from pp_intrangepart where a = '(1,2)'::int4range;
drop table pp_intrangepart;
--
-- Ensure the enable_partition_prune GUC properly disables partition pruning.
--
create table pp_lp (a int, value int) partition by list (a);
create table pp_lp1 partition of pp_lp for values in(1);
create table pp_lp2 partition of pp_lp for values in(2);
explain (costs off) select * from pp_lp where a = 1;
explain (costs off) update pp_lp set value = 10 where a = 1;
explain (costs off) delete from pp_lp where a = 1;
set enable_partition_pruning = off;
set constraint_exclusion = 'partition'; -- this should not affect the result.
explain (costs off) select * from pp_lp where a = 1;
explain (costs off) update pp_lp set value = 10 where a = 1;
explain (costs off) delete from pp_lp where a = 1;
set constraint_exclusion = 'off'; -- this should not affect the result.
explain (costs off) select * from pp_lp where a = 1;
explain (costs off) update pp_lp set value = 10 where a = 1;
explain (costs off) delete from pp_lp where a = 1;
drop table pp_lp;
-- Ensure enable_partition_prune does not affect non-partitioned tables.
create table inh_lp (a int, value int);
create table inh_lp1 (a int, value int, check(a = 1)) inherits (inh_lp);
create table inh_lp2 (a int, value int, check(a = 2)) inherits (inh_lp);
set constraint_exclusion = 'partition';
-- inh_lp2 should be removed in the following 3 cases.
explain (costs off) select * from inh_lp where a = 1;
explain (costs off) update inh_lp set value = 10 where a = 1;
explain (costs off) delete from inh_lp where a = 1;
-- Ensure we don't exclude normal relations when we only expect to exclude
-- inheritance children
explain (costs off) update inh_lp1 set value = 10 where a = 2;
drop table inh_lp cascade;
reset enable_partition_pruning;
reset constraint_exclusion;
Clarify use of temporary tables within partition trees Since their introduction, partition trees have been a bit lossy regarding temporary relations. Inheritance trees respect the following patterns: 1) a child relation can be temporary if the parent is permanent. 2) a child relation can be temporary if the parent is temporary. 3) a child relation cannot be permanent if the parent is temporary. 4) The use of temporary relations also imply that when both parent and child need to be from the same sessions. Partitions share many similar patterns with inheritance, however the handling of the partition bounds make the situation a bit tricky for case 1) as the partition code bases a lot of its lookup code upon PartitionDesc which does not really look after relpersistence. This causes for example a temporary partition created by session A to be visible by another session B, preventing this session B to create an extra partition which overlaps with the temporary one created by A with a non-intuitive error message. There could be use-cases where mixing permanent partitioned tables with temporary partitions make sense, but that would be a new feature. Partitions respect 2), 3) and 4) already. It is a bit depressing to see those error checks happening in MergeAttributes() whose purpose is different, but that's left as future refactoring work. Back-patch down to 10, which is where partitioning has been introduced, except that default partitions do not apply there. Documentation also includes limitations related to the use of temporary tables with partition trees. Reported-by: David Rowley Author: Amit Langote, Michael Paquier Reviewed-by: Ashutosh Bapat, Amit Langote, Michael Paquier Discussion: https://postgr.es/m/CAKJS1f94Ojk0og9GMkRHGt8wHTW=ijq5KzJKuoBoqWLwSVwGmw@mail.gmail.com
7 years ago
-- Check pruning for a partition tree containing only temporary relations
create temp table pp_temp_parent (a int) partition by list (a);
create temp table pp_temp_part_1 partition of pp_temp_parent for values in (1);
create temp table pp_temp_part_def partition of pp_temp_parent default;
explain (costs off) select * from pp_temp_parent where true;
explain (costs off) select * from pp_temp_parent where a = 2;
drop table pp_temp_parent;
-- Stress run-time partition pruning a bit more, per bug reports
create temp table p (a int, b int, c int) partition by list (a);
create temp table p1 partition of p for values in (1);
create temp table p2 partition of p for values in (2);
create temp table q (a int, b int, c int) partition by list (a);
create temp table q1 partition of q for values in (1) partition by list (b);
create temp table q11 partition of q1 for values in (1) partition by list (c);
create temp table q111 partition of q11 for values in (1);
create temp table q2 partition of q for values in (2) partition by list (b);
create temp table q21 partition of q2 for values in (1);
create temp table q22 partition of q2 for values in (2);
insert into q22 values (2, 2, 3);
explain (costs off)
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = 1 and s.b = 1 and s.c = (select 1);
prepare q (int, int) as
select *
from (
select * from p
union all
select * from q1
union all
select 1, 1, 1
) s(a, b, c)
where s.a = $1 and s.b = $2 and s.c = (select 1);
explain (costs off) execute q (1, 1);
execute q (1, 1);
drop table p, q;
-- Ensure run-time pruning works correctly when we match a partitioned table
-- on the first level but find no matching partitions on the second level.
create table listp (a int, b int) partition by list (a);
create table listp1 partition of listp for values in(1);
create table listp2 partition of listp for values in(2) partition by list(b);
create table listp2_10 partition of listp2 for values in (10);
explain (analyze, costs off, summary off, timing off)
select * from listp where a = (select 2) and b <> 10;
Clean up handling of constraint_exclusion and enable_partition_pruning. The interaction of these parameters was a bit confused/confusing, and in fact v11 entirely misses the opportunity to apply partition constraints when a partition is accessed directly (rather than indirectly from its parent). In HEAD, establish the principle that enable_partition_pruning controls partition pruning and nothing else. When accessing a partition via its parent, we do partition pruning (if enabled by enable_partition_pruning) and then there is no need to consider partition constraints in the constraint_exclusion logic. When accessing a partition directly, its partition constraints are applied by the constraint_exclusion logic, only if constraint_exclusion = on. In v11, we can't have such a clean division of these GUCs' effects, partly because we don't want to break compatibility too much in a released branch, and partly because the clean coding requires inheritance_planner to have applied partition pruning to a partitioned target table, which it doesn't in v11. However, we can tweak things enough to cover the missed case, which seems like a good idea since it's potentially a performance regression from v10. This patch keeps v11's previous behavior in which enable_partition_pruning overrides constraint_exclusion for an inherited target table, though. In HEAD, also teach relation_excluded_by_constraints that it's okay to use inheritable constraints when trying to prune a traditional inheritance tree. This might not be thought worthy of effort given that that feature is semi-deprecated now, but we have enough infrastructure that it only takes a couple more lines of code to do it correctly. Amit Langote and Tom Lane Discussion: https://postgr.es/m/9813f079-f16b-61c8-9ab7-4363cab28d80@lab.ntt.co.jp Discussion: https://postgr.es/m/29069.1555970894@sss.pgh.pa.us
6 years ago
--
-- check that a partition directly accessed in a query is excluded with
-- constraint_exclusion = on
--
-- turn off partition pruning, so that it doesn't interfere
set enable_partition_pruning to off;
-- setting constraint_exclusion to 'partition' disables exclusion
set constraint_exclusion to 'partition';
explain (costs off) select * from listp1 where a = 2;
explain (costs off) update listp1 set a = 1 where a = 2;
-- constraint exclusion enabled
set constraint_exclusion to 'on';
explain (costs off) select * from listp1 where a = 2;
explain (costs off) update listp1 set a = 1 where a = 2;
reset constraint_exclusion;
reset enable_partition_pruning;
drop table listp;
Fix some issues with step generation in partition pruning. In the case of range partitioning, get_steps_using_prefix() assumes that the passed-in prefix list contains at least one clause for each of the partition keys earlier than one specified in the passed-in step_lastkeyno, but the caller (ie, gen_prune_steps_from_opexps()) didn't take it into account, which led to a server crash or incorrect results when the list contained no clauses for such partition keys, as reported in bug #16500 and #16501 from Kobayashi Hisanori. Update the caller to call that function only when the list created there contains at least one clause for each of the earlier partition keys in the case of range partitioning. While at it, fix some other issues: * The list to pass to get_steps_using_prefix() is allowed to contain multiple clauses for the same partition key, as described in the comment for that function, but that function actually assumed that the list contained just a single clause for each of middle partition keys, which led to an assertion failure when the list contained multiple clauses for such partition keys. Update that function to match the comment. * In the case of hash partitioning, partition keys are allowed to be NULL, in which case the list to pass to get_steps_using_prefix() contains no clauses for NULL partition keys, but that function treats that case as like the case of range partitioning, which led to the assertion failure. Update the assertion test to take into account NULL partition keys in the case of hash partitioning. * Fix a typo in a comment in get_steps_using_prefix_recurse(). * gen_partprune_steps() failed to detect self-contradiction from strict-qual clauses and an IS NULL clause for the same partition key in some cases, producing incorrect partition-pruning steps, which led to incorrect results of partition pruning, but didn't cause any user-visible problems fortunately, as the self-contradiction is detected later in the query planning. Update that function to detect the self-contradiction. Per bug #16500 and #16501 from Kobayashi Hisanori. Patch by me, initial diagnosis for the reported issue and review by Dmitry Dolgov. Back-patch to v11, where partition pruning was introduced. Discussion: https://postgr.es/m/16500-d1613f2a78e1e090%40postgresql.org Discussion: https://postgr.es/m/16501-5234a9a0394f6754%40postgresql.org
5 years ago
Allow run-time pruning on nested Append/MergeAppend nodes Previously we only tagged on the required information to allow the executor to perform run-time partition pruning for Append/MergeAppend nodes belonging to base relations. It was thought that nested Append/MergeAppend nodes were just about always pulled up into the top-level Append/MergeAppend and that making the run-time pruning info for any sub Append/MergeAppend nodes was a waste of time. However, that was likely badly thought through. Some examples of cases we're unable to pullup nested Append/MergeAppends are: 1) Parallel Append nodes with a mix of parallel and non-parallel paths into a Parallel Append. 2) When planning an ordered Append scan a sub-partition which is unordered may require a nested MergeAppend path to ensure sub-partitions don't mix up the order of tuples being fed into the top-level Append. Unfortunately, it was not just as simple as removing the lines in createplan.c which were purposefully not building the run-time pruning info for anything but RELOPT_BASEREL relations. The code in add_paths_to_append_rel() was far too sloppy about which partitioned_rels it included for the Append/MergeAppend paths. The original code there would always assume accumulate_append_subpath() would pull each sub-Append and sub-MergeAppend path into the top-level path. While it does not appear that there were any actual bugs caused by having the additional partitioned table RT indexes recorded, what it did mean is that later in planning, when we built the run-time pruning info that we wasted effort and built PartitionedRelPruneInfos for partitioned tables that we had no subpaths for the executor to run-time prune. Here we tighten that up so that partitioned_rels only ever contains the RT index for partitioned tables which actually have subpaths in the given Append/MergeAppend. We can now Assert that every PartitionedRelPruneInfo has a non-empty present_parts. That should allow us to catch any weird corner cases that have been missed. In passing, it seems there is no longer a good reason to have the AppendPath and MergeAppendPath's partitioned_rel fields a List of IntList. We can simply have a List of Relids instead. This is more compact in memory and faster to add new members to. We still know which is the root level partition as these always have a lower relid than their children. Previously this field was used for more things, but run-time partition pruning now remains the only user of it and it has no need for a List of IntLists. Here we also get rid of the RelOptInfo partitioned_child_rels field. This is what was previously used to (sometimes incorrectly) set the Append/MergeAppend path's partitioned_rels field. That was the only usage of that field, so we can happily just remove it. I also couldn't resist changing some nearby code to make use of the newly added for_each_from macro so we can skip the first element in the list without checking if the current item was the first one on each iteration. A bug report from Andreas Kretschmer prompted all this work, however, after some consideration, I'm not personally classing this as a bug fix. So no backpatch. In Andreas' test case, it just wasn't that clear that there was a nested Append since the top-level Append just had a single sub-path which was pulled up a level, per 8edd0e794. Author: David Rowley Reviewed-by: Amit Langote Discussion: https://postgr.es/m/flat/CAApHDvqSchs%2BubdybcfFaSPB%2B%2BEA7kqMaoqajtP0GtZvzOOR3g%40mail.gmail.com
5 years ago
-- Ensure run-time pruning works correctly for nested Append nodes
set parallel_setup_cost to 0;
set parallel_tuple_cost to 0;
create table listp (a int) partition by list(a);
create table listp_12 partition of listp for values in(1,2) partition by list(a);
create table listp_12_1 partition of listp_12 for values in(1);
create table listp_12_2 partition of listp_12 for values in(2);
-- Force the 2nd subnode of the Append to be non-parallel. This results in
-- a nested Append node because the mixed parallel / non-parallel paths cannot
-- be pulled into the top-level Append.
alter table listp_12_1 set (parallel_workers = 0);
-- Ensure that listp_12_2 is not scanned. (The nested Append is not seen in
-- the plan as it's pulled in setref.c due to having just a single subnode).
select explain_parallel_append('select * from listp where a = (select 1);');
Allow run-time pruning on nested Append/MergeAppend nodes Previously we only tagged on the required information to allow the executor to perform run-time partition pruning for Append/MergeAppend nodes belonging to base relations. It was thought that nested Append/MergeAppend nodes were just about always pulled up into the top-level Append/MergeAppend and that making the run-time pruning info for any sub Append/MergeAppend nodes was a waste of time. However, that was likely badly thought through. Some examples of cases we're unable to pullup nested Append/MergeAppends are: 1) Parallel Append nodes with a mix of parallel and non-parallel paths into a Parallel Append. 2) When planning an ordered Append scan a sub-partition which is unordered may require a nested MergeAppend path to ensure sub-partitions don't mix up the order of tuples being fed into the top-level Append. Unfortunately, it was not just as simple as removing the lines in createplan.c which were purposefully not building the run-time pruning info for anything but RELOPT_BASEREL relations. The code in add_paths_to_append_rel() was far too sloppy about which partitioned_rels it included for the Append/MergeAppend paths. The original code there would always assume accumulate_append_subpath() would pull each sub-Append and sub-MergeAppend path into the top-level path. While it does not appear that there were any actual bugs caused by having the additional partitioned table RT indexes recorded, what it did mean is that later in planning, when we built the run-time pruning info that we wasted effort and built PartitionedRelPruneInfos for partitioned tables that we had no subpaths for the executor to run-time prune. Here we tighten that up so that partitioned_rels only ever contains the RT index for partitioned tables which actually have subpaths in the given Append/MergeAppend. We can now Assert that every PartitionedRelPruneInfo has a non-empty present_parts. That should allow us to catch any weird corner cases that have been missed. In passing, it seems there is no longer a good reason to have the AppendPath and MergeAppendPath's partitioned_rel fields a List of IntList. We can simply have a List of Relids instead. This is more compact in memory and faster to add new members to. We still know which is the root level partition as these always have a lower relid than their children. Previously this field was used for more things, but run-time partition pruning now remains the only user of it and it has no need for a List of IntLists. Here we also get rid of the RelOptInfo partitioned_child_rels field. This is what was previously used to (sometimes incorrectly) set the Append/MergeAppend path's partitioned_rels field. That was the only usage of that field, so we can happily just remove it. I also couldn't resist changing some nearby code to make use of the newly added for_each_from macro so we can skip the first element in the list without checking if the current item was the first one on each iteration. A bug report from Andreas Kretschmer prompted all this work, however, after some consideration, I'm not personally classing this as a bug fix. So no backpatch. In Andreas' test case, it just wasn't that clear that there was a nested Append since the top-level Append just had a single sub-path which was pulled up a level, per 8edd0e794. Author: David Rowley Reviewed-by: Amit Langote Discussion: https://postgr.es/m/flat/CAApHDvqSchs%2BubdybcfFaSPB%2B%2BEA7kqMaoqajtP0GtZvzOOR3g%40mail.gmail.com
5 years ago
-- Like the above but throw some more complexity at the planner by adding
-- a UNION ALL. We expect both sides of the union not to scan the
-- non-required partitions.
select explain_parallel_append(
'select * from listp where a = (select 1)
Allow run-time pruning on nested Append/MergeAppend nodes Previously we only tagged on the required information to allow the executor to perform run-time partition pruning for Append/MergeAppend nodes belonging to base relations. It was thought that nested Append/MergeAppend nodes were just about always pulled up into the top-level Append/MergeAppend and that making the run-time pruning info for any sub Append/MergeAppend nodes was a waste of time. However, that was likely badly thought through. Some examples of cases we're unable to pullup nested Append/MergeAppends are: 1) Parallel Append nodes with a mix of parallel and non-parallel paths into a Parallel Append. 2) When planning an ordered Append scan a sub-partition which is unordered may require a nested MergeAppend path to ensure sub-partitions don't mix up the order of tuples being fed into the top-level Append. Unfortunately, it was not just as simple as removing the lines in createplan.c which were purposefully not building the run-time pruning info for anything but RELOPT_BASEREL relations. The code in add_paths_to_append_rel() was far too sloppy about which partitioned_rels it included for the Append/MergeAppend paths. The original code there would always assume accumulate_append_subpath() would pull each sub-Append and sub-MergeAppend path into the top-level path. While it does not appear that there were any actual bugs caused by having the additional partitioned table RT indexes recorded, what it did mean is that later in planning, when we built the run-time pruning info that we wasted effort and built PartitionedRelPruneInfos for partitioned tables that we had no subpaths for the executor to run-time prune. Here we tighten that up so that partitioned_rels only ever contains the RT index for partitioned tables which actually have subpaths in the given Append/MergeAppend. We can now Assert that every PartitionedRelPruneInfo has a non-empty present_parts. That should allow us to catch any weird corner cases that have been missed. In passing, it seems there is no longer a good reason to have the AppendPath and MergeAppendPath's partitioned_rel fields a List of IntList. We can simply have a List of Relids instead. This is more compact in memory and faster to add new members to. We still know which is the root level partition as these always have a lower relid than their children. Previously this field was used for more things, but run-time partition pruning now remains the only user of it and it has no need for a List of IntLists. Here we also get rid of the RelOptInfo partitioned_child_rels field. This is what was previously used to (sometimes incorrectly) set the Append/MergeAppend path's partitioned_rels field. That was the only usage of that field, so we can happily just remove it. I also couldn't resist changing some nearby code to make use of the newly added for_each_from macro so we can skip the first element in the list without checking if the current item was the first one on each iteration. A bug report from Andreas Kretschmer prompted all this work, however, after some consideration, I'm not personally classing this as a bug fix. So no backpatch. In Andreas' test case, it just wasn't that clear that there was a nested Append since the top-level Append just had a single sub-path which was pulled up a level, per 8edd0e794. Author: David Rowley Reviewed-by: Amit Langote Discussion: https://postgr.es/m/flat/CAApHDvqSchs%2BubdybcfFaSPB%2B%2BEA7kqMaoqajtP0GtZvzOOR3g%40mail.gmail.com
5 years ago
union all
select * from listp where a = (select 2);');
Allow run-time pruning on nested Append/MergeAppend nodes Previously we only tagged on the required information to allow the executor to perform run-time partition pruning for Append/MergeAppend nodes belonging to base relations. It was thought that nested Append/MergeAppend nodes were just about always pulled up into the top-level Append/MergeAppend and that making the run-time pruning info for any sub Append/MergeAppend nodes was a waste of time. However, that was likely badly thought through. Some examples of cases we're unable to pullup nested Append/MergeAppends are: 1) Parallel Append nodes with a mix of parallel and non-parallel paths into a Parallel Append. 2) When planning an ordered Append scan a sub-partition which is unordered may require a nested MergeAppend path to ensure sub-partitions don't mix up the order of tuples being fed into the top-level Append. Unfortunately, it was not just as simple as removing the lines in createplan.c which were purposefully not building the run-time pruning info for anything but RELOPT_BASEREL relations. The code in add_paths_to_append_rel() was far too sloppy about which partitioned_rels it included for the Append/MergeAppend paths. The original code there would always assume accumulate_append_subpath() would pull each sub-Append and sub-MergeAppend path into the top-level path. While it does not appear that there were any actual bugs caused by having the additional partitioned table RT indexes recorded, what it did mean is that later in planning, when we built the run-time pruning info that we wasted effort and built PartitionedRelPruneInfos for partitioned tables that we had no subpaths for the executor to run-time prune. Here we tighten that up so that partitioned_rels only ever contains the RT index for partitioned tables which actually have subpaths in the given Append/MergeAppend. We can now Assert that every PartitionedRelPruneInfo has a non-empty present_parts. That should allow us to catch any weird corner cases that have been missed. In passing, it seems there is no longer a good reason to have the AppendPath and MergeAppendPath's partitioned_rel fields a List of IntList. We can simply have a List of Relids instead. This is more compact in memory and faster to add new members to. We still know which is the root level partition as these always have a lower relid than their children. Previously this field was used for more things, but run-time partition pruning now remains the only user of it and it has no need for a List of IntLists. Here we also get rid of the RelOptInfo partitioned_child_rels field. This is what was previously used to (sometimes incorrectly) set the Append/MergeAppend path's partitioned_rels field. That was the only usage of that field, so we can happily just remove it. I also couldn't resist changing some nearby code to make use of the newly added for_each_from macro so we can skip the first element in the list without checking if the current item was the first one on each iteration. A bug report from Andreas Kretschmer prompted all this work, however, after some consideration, I'm not personally classing this as a bug fix. So no backpatch. In Andreas' test case, it just wasn't that clear that there was a nested Append since the top-level Append just had a single sub-path which was pulled up a level, per 8edd0e794. Author: David Rowley Reviewed-by: Amit Langote Discussion: https://postgr.es/m/flat/CAApHDvqSchs%2BubdybcfFaSPB%2B%2BEA7kqMaoqajtP0GtZvzOOR3g%40mail.gmail.com
5 years ago
drop table listp;
reset parallel_tuple_cost;
reset parallel_setup_cost;
-- Test case for run-time pruning with a nested Merge Append
set enable_sort to 0;
create table rangep (a int, b int) partition by range (a);
create table rangep_0_to_100 partition of rangep for values from (0) to (100) partition by list (b);
-- We need 3 sub-partitions. 1 to validate pruning worked and another two
-- because a single remaining partition would be pulled up to the main Append.
create table rangep_0_to_100_1 partition of rangep_0_to_100 for values in(1);
create table rangep_0_to_100_2 partition of rangep_0_to_100 for values in(2);
create table rangep_0_to_100_3 partition of rangep_0_to_100 for values in(3);
create table rangep_100_to_200 partition of rangep for values from (100) to (200);
create index on rangep (a);
-- Ensure run-time pruning works on the nested Merge Append
explain (analyze on, costs off, timing off, summary off)
select * from rangep where b IN((select 1),(select 2)) order by a;
reset enable_sort;
drop table rangep;
Fix some issues with step generation in partition pruning. In the case of range partitioning, get_steps_using_prefix() assumes that the passed-in prefix list contains at least one clause for each of the partition keys earlier than one specified in the passed-in step_lastkeyno, but the caller (ie, gen_prune_steps_from_opexps()) didn't take it into account, which led to a server crash or incorrect results when the list contained no clauses for such partition keys, as reported in bug #16500 and #16501 from Kobayashi Hisanori. Update the caller to call that function only when the list created there contains at least one clause for each of the earlier partition keys in the case of range partitioning. While at it, fix some other issues: * The list to pass to get_steps_using_prefix() is allowed to contain multiple clauses for the same partition key, as described in the comment for that function, but that function actually assumed that the list contained just a single clause for each of middle partition keys, which led to an assertion failure when the list contained multiple clauses for such partition keys. Update that function to match the comment. * In the case of hash partitioning, partition keys are allowed to be NULL, in which case the list to pass to get_steps_using_prefix() contains no clauses for NULL partition keys, but that function treats that case as like the case of range partitioning, which led to the assertion failure. Update the assertion test to take into account NULL partition keys in the case of hash partitioning. * Fix a typo in a comment in get_steps_using_prefix_recurse(). * gen_partprune_steps() failed to detect self-contradiction from strict-qual clauses and an IS NULL clause for the same partition key in some cases, producing incorrect partition-pruning steps, which led to incorrect results of partition pruning, but didn't cause any user-visible problems fortunately, as the self-contradiction is detected later in the query planning. Update that function to detect the self-contradiction. Per bug #16500 and #16501 from Kobayashi Hisanori. Patch by me, initial diagnosis for the reported issue and review by Dmitry Dolgov. Back-patch to v11, where partition pruning was introduced. Discussion: https://postgr.es/m/16500-d1613f2a78e1e090%40postgresql.org Discussion: https://postgr.es/m/16501-5234a9a0394f6754%40postgresql.org
5 years ago
--
-- Check that gen_prune_steps_from_opexps() works well for various cases of
-- clauses for different partition keys
--
create table rp_prefix_test1 (a int, b varchar) partition by range(a, b);
create table rp_prefix_test1_p1 partition of rp_prefix_test1 for values from (1, 'a') to (1, 'b');
create table rp_prefix_test1_p2 partition of rp_prefix_test1 for values from (2, 'a') to (2, 'b');
-- Don't call get_steps_using_prefix() with the last partition key b plus
-- an empty prefix
explain (costs off) select * from rp_prefix_test1 where a <= 1 and b = 'a';
create table rp_prefix_test2 (a int, b int, c int) partition by range(a, b, c);
create table rp_prefix_test2_p1 partition of rp_prefix_test2 for values from (1, 1, 0) to (1, 1, 10);
create table rp_prefix_test2_p2 partition of rp_prefix_test2 for values from (2, 2, 0) to (2, 2, 10);
-- Don't call get_steps_using_prefix() with the last partition key c plus
-- an invalid prefix (ie, b = 1)
explain (costs off) select * from rp_prefix_test2 where a <= 1 and b = 1 and c >= 0;
create table rp_prefix_test3 (a int, b int, c int, d int) partition by range(a, b, c, d);
create table rp_prefix_test3_p1 partition of rp_prefix_test3 for values from (1, 1, 1, 0) to (1, 1, 1, 10);
create table rp_prefix_test3_p2 partition of rp_prefix_test3 for values from (2, 2, 2, 0) to (2, 2, 2, 10);
-- Test that get_steps_using_prefix() handles a prefix that contains multiple
-- clauses for the partition key b (ie, b >= 1 and b >= 2)
explain (costs off) select * from rp_prefix_test3 where a >= 1 and b >= 1 and b >= 2 and c >= 2 and d >= 0;
-- Test that get_steps_using_prefix() handles a prefix that contains multiple
-- clauses for the partition key b (ie, b >= 1 and b = 2) (This also tests
-- that the caller arranges clauses in that prefix in the required order)
explain (costs off) select * from rp_prefix_test3 where a >= 1 and b >= 1 and b = 2 and c = 2 and d >= 0;
Fix some issues with step generation in partition pruning. In the case of range partitioning, get_steps_using_prefix() assumes that the passed-in prefix list contains at least one clause for each of the partition keys earlier than one specified in the passed-in step_lastkeyno, but the caller (ie, gen_prune_steps_from_opexps()) didn't take it into account, which led to a server crash or incorrect results when the list contained no clauses for such partition keys, as reported in bug #16500 and #16501 from Kobayashi Hisanori. Update the caller to call that function only when the list created there contains at least one clause for each of the earlier partition keys in the case of range partitioning. While at it, fix some other issues: * The list to pass to get_steps_using_prefix() is allowed to contain multiple clauses for the same partition key, as described in the comment for that function, but that function actually assumed that the list contained just a single clause for each of middle partition keys, which led to an assertion failure when the list contained multiple clauses for such partition keys. Update that function to match the comment. * In the case of hash partitioning, partition keys are allowed to be NULL, in which case the list to pass to get_steps_using_prefix() contains no clauses for NULL partition keys, but that function treats that case as like the case of range partitioning, which led to the assertion failure. Update the assertion test to take into account NULL partition keys in the case of hash partitioning. * Fix a typo in a comment in get_steps_using_prefix_recurse(). * gen_partprune_steps() failed to detect self-contradiction from strict-qual clauses and an IS NULL clause for the same partition key in some cases, producing incorrect partition-pruning steps, which led to incorrect results of partition pruning, but didn't cause any user-visible problems fortunately, as the self-contradiction is detected later in the query planning. Update that function to detect the self-contradiction. Per bug #16500 and #16501 from Kobayashi Hisanori. Patch by me, initial diagnosis for the reported issue and review by Dmitry Dolgov. Back-patch to v11, where partition pruning was introduced. Discussion: https://postgr.es/m/16500-d1613f2a78e1e090%40postgresql.org Discussion: https://postgr.es/m/16501-5234a9a0394f6754%40postgresql.org
5 years ago
create table hp_prefix_test (a int, b int, c int, d int) partition by hash (a part_test_int4_ops, b part_test_int4_ops, c part_test_int4_ops, d part_test_int4_ops);
create table hp_prefix_test_p1 partition of hp_prefix_test for values with (modulus 2, remainder 0);
create table hp_prefix_test_p2 partition of hp_prefix_test for values with (modulus 2, remainder 1);
-- Test that get_steps_using_prefix() handles non-NULL step_nullkeys
explain (costs off) select * from hp_prefix_test where a = 1 and b is null and c = 1 and d = 1;
drop table rp_prefix_test1;
drop table rp_prefix_test2;
drop table rp_prefix_test3;
drop table hp_prefix_test;
--
-- Check that gen_partprune_steps() detects self-contradiction from clauses
-- regardless of the order of the clauses (Here we use a custom operator to
-- prevent the equivclass.c machinery from reordering the clauses)
--
create operator === (
leftarg = int4,
rightarg = int4,
procedure = int4eq,
commutator = ===,
hashes
);
create operator class part_test_int4_ops2
for type int4
using hash as
operator 1 ===,
function 2 part_hashint4_noop(int4, int8);
create table hp_contradict_test (a int, b int) partition by hash (a part_test_int4_ops2, b part_test_int4_ops2);
create table hp_contradict_test_p1 partition of hp_contradict_test for values with (modulus 2, remainder 0);
create table hp_contradict_test_p2 partition of hp_contradict_test for values with (modulus 2, remainder 1);
explain (costs off) select * from hp_contradict_test where a is null and a === 1 and b === 1;
explain (costs off) select * from hp_contradict_test where a === 1 and b === 1 and a is null;
drop table hp_contradict_test;
drop operator class part_test_int4_ops2 using hash;
drop operator ===(int4, int4);