Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- Creating an index on a partitioned table makes the partitions
|
|
|
|
|
-- automatically get the index
|
|
|
|
|
create table idxpart (a int, b int, c text) partition by range (a);
|
|
|
|
|
|
|
|
|
|
-- relhassubclass of a partitioned index is false before creating any partition.
|
|
|
|
|
-- It will be set after the first partition is created.
|
|
|
|
|
create index idxpart_idx on idxpart (a);
|
|
|
|
|
select relhassubclass from pg_class where relname = 'idxpart_idx';
|
|
|
|
|
|
|
|
|
|
-- Check that partitioned indexes are present in pg_indexes.
|
|
|
|
|
select indexdef from pg_indexes where indexname like 'idxpart_idx%';
|
|
|
|
|
drop index idxpart_idx;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (10);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (10) to (100)
|
|
|
|
|
partition by range (b);
|
|
|
|
|
create table idxpart21 partition of idxpart2 for values from (0) to (100);
|
|
|
|
|
|
|
|
|
|
-- Even with partitions, relhassubclass should not be set if a partitioned
|
|
|
|
|
-- index is created only on the parent.
|
|
|
|
|
create index idxpart_idx on only idxpart(a);
|
|
|
|
|
select relhassubclass from pg_class where relname = 'idxpart_idx';
|
|
|
|
|
drop index idxpart_idx;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
create index on idxpart (a);
|
|
|
|
|
select relname, relkind, relhassubclass, inhparent::regclass
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Some unsupported features
|
|
|
|
|
create table idxpart (a int, b int, c text) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (10);
|
|
|
|
|
create index concurrently on idxpart (a);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify bugfix with query on indexed partitioned table with no partitions
|
|
|
|
|
-- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
|
|
|
|
|
CREATE TABLE idxpart (col1 INT) PARTITION BY RANGE (col1);
|
|
|
|
|
CREATE INDEX ON idxpart (col1);
|
|
|
|
|
CREATE TABLE idxpart_two (col2 INT);
|
|
|
|
|
SELECT col2 FROM idxpart_two fk LEFT OUTER JOIN idxpart pk ON (col1 = col2);
|
|
|
|
|
DROP table idxpart, idxpart_two;
|
|
|
|
|
|
|
|
|
|
-- Verify bugfix with index rewrite on ALTER TABLE / SET DATA TYPE
|
|
|
|
|
-- https://postgr.es/m/CAKcux6mxNCGsgATwf5CGMF8g4WSupCXicCVMeKUTuWbyxHOMsQ@mail.gmail.com
|
|
|
|
|
CREATE TABLE idxpart (a INT, b TEXT, c INT) PARTITION BY RANGE(a);
|
|
|
|
|
CREATE TABLE idxpart1 PARTITION OF idxpart FOR VALUES FROM (MINVALUE) TO (MAXVALUE);
|
|
|
|
|
CREATE INDEX partidx_abc_idx ON idxpart (a, b, c);
|
|
|
|
|
INSERT INTO idxpart (a, b, c) SELECT i, i, i FROM generate_series(1, 50) i;
|
|
|
|
|
ALTER TABLE idxpart ALTER COLUMN c TYPE numeric;
|
|
|
|
|
DROP TABLE idxpart;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- If a table without index is attached as partition to a table with
|
|
|
|
|
-- an index, the index is automatically created
|
|
|
|
|
create table idxpart (a int, b int, c text) partition by range (a);
|
|
|
|
|
create index idxparti on idxpart (a);
|
|
|
|
|
create index idxparti2 on idxpart (b, c);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (10);
|
|
|
|
|
\d idxpart1
|
Fix assorted bugs in pg_get_partition_constraintdef().
It failed if passed a nonexistent relation OID, or one that was a non-heap
relation, because of blindly applying heap_open to a user-supplied OID.
This is not OK behavior for a SQL-exposed function; we have a project
policy that we should return NULL in such cases. Moreover, since
pg_get_partition_constraintdef ought now to work on indexes, restricting
it to heaps is flat wrong anyway.
The underlying function generate_partition_qual() wasn't on board with
indexes having partition quals either, nor for that matter with rels
having relispartition set but yet null relpartbound. (One wonders
whether the person who wrote the function comment blocks claiming that
these functions allow a missing relpartbound had ever tested it.)
Fix by testing relispartition before opening the rel, and by using
relation_open not heap_open. (If any other relkinds ever grow the
ability to have relispartition set, the code will work with them
automatically.) Also, don't reject null relpartbound in
generate_partition_qual.
Back-patch to v11, and all but the null-relpartbound change to v10.
(It's not really necessary to change generate_partition_qual at all
in v10, but I thought s/heap_open/relation_open/ would be a good
idea anyway just to keep the code in sync with later branches.)
Per report from Justin Pryzby.
Discussion: https://postgr.es/m/20180927200020.GJ776@telsasoft.com
7 years ago
|
|
|
\d+ idxpart1_a_idx
|
|
|
|
|
\d+ idxpart1_b_c_idx
|
|
|
|
|
|
|
|
|
|
-- Forbid ALTER TABLE when attaching or detaching an index to a partition.
|
|
|
|
|
create index idxpart_c on only idxpart (c);
|
|
|
|
|
create index idxpart1_c on idxpart1 (c);
|
|
|
|
|
alter table idxpart_c attach partition idxpart1_c for values from (10) to (20);
|
|
|
|
|
alter index idxpart_c attach partition idxpart1_c;
|
|
|
|
|
select relname, relpartbound from pg_class
|
|
|
|
|
where relname in ('idxpart_c', 'idxpart1_c')
|
|
|
|
|
order by relname;
|
|
|
|
|
alter table idxpart_c detach partition idxpart1_c;
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- If a partition already has an index, don't create a duplicative one
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a, b);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10);
|
|
|
|
|
create index on idxpart1 (a, b);
|
|
|
|
|
create index on idxpart (a, b);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select relname, relkind, relhassubclass, inhparent::regclass
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- DROP behavior for partitioned indexes
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (10);
|
|
|
|
|
drop index idxpart1_a_idx; -- no way
|
|
|
|
|
drop index concurrently idxpart_a_idx; -- unsupported
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
drop index idxpart_a_idx; -- both indexes go away
|
|
|
|
|
select relname, relkind from pg_class
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
drop table idxpart1; -- the index on partition goes away too
|
|
|
|
|
select relname, relkind from pg_class
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- DROP behavior with temporary partitioned indexes
|
|
|
|
|
create temp table idxpart_temp (a int) partition by range (a);
|
|
|
|
|
create index on idxpart_temp(a);
|
|
|
|
|
create temp table idxpart1_temp partition of idxpart_temp
|
|
|
|
|
for values from (0) to (10);
|
|
|
|
|
drop index idxpart1_temp_a_idx; -- error
|
|
|
|
|
-- non-concurrent drop is enforced here, so it is a valid case.
|
|
|
|
|
drop index concurrently idxpart_temp_a_idx;
|
|
|
|
|
select relname, relkind from pg_class
|
|
|
|
|
where relname like 'idxpart_temp%' order by relname;
|
|
|
|
|
drop table idxpart_temp;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- ALTER INDEX .. ATTACH, error cases
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a, b);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0, 0) to (10, 10);
|
|
|
|
|
create index idxpart_a_b_idx on only idxpart (a, b);
|
|
|
|
|
create index idxpart1_a_b_idx on idxpart1 (a, b);
|
|
|
|
|
create index idxpart1_tst1 on idxpart1 (b, a);
|
|
|
|
|
create index idxpart1_tst2 on idxpart1 using hash (a);
|
|
|
|
|
create index idxpart1_tst3 on idxpart1 (a, b) where a > 10;
|
|
|
|
|
|
|
|
|
|
alter index idxpart attach partition idxpart1;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart_a_b_idx;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_b_idx;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_tst1;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_tst2;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_tst3;
|
|
|
|
|
-- OK
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx;
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; -- quiet
|
|
|
|
|
|
|
|
|
|
-- reject dupe
|
|
|
|
|
create index idxpart1_2_a_b on idxpart1 (a, b);
|
|
|
|
|
alter index idxpart_a_b_idx attach partition idxpart1_2_a_b;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- make sure everything's gone
|
|
|
|
|
select indexrelid::regclass, indrelid::regclass
|
|
|
|
|
from pg_index where indexrelid::regclass::text like 'idxpart%';
|
|
|
|
|
|
|
|
|
|
-- Don't auto-attach incompatible indexes
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (a int, b int);
|
|
|
|
|
create index on idxpart1 using hash (a);
|
|
|
|
|
create index on idxpart1 (a) where b > 1;
|
|
|
|
|
create index on idxpart1 ((a + 0));
|
|
|
|
|
create index on idxpart1 (a, a);
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (1000);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- If CREATE INDEX ONLY, don't create indexes on partitions; and existing
|
|
|
|
|
-- indexes on partitions don't change parent. ALTER INDEX ATTACH can change
|
|
|
|
|
-- the parent after the fact.
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (100);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (100) to (1000)
|
|
|
|
|
partition by range (a);
|
|
|
|
|
create table idxpart21 partition of idxpart2 for values from (100) to (200);
|
|
|
|
|
create table idxpart22 partition of idxpart2 for values from (200) to (300);
|
|
|
|
|
create index on idxpart22 (a);
|
|
|
|
|
create index on only idxpart2 (a);
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
-- Here we expect that idxpart1 and idxpart2 have a new index, but idxpart21
|
|
|
|
|
-- does not; also, idxpart22 is not attached.
|
|
|
|
|
\d idxpart1
|
|
|
|
|
\d idxpart2
|
|
|
|
|
\d idxpart21
|
|
|
|
|
select indexrelid::regclass, indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
alter index idxpart2_a_idx attach partition idxpart22_a_idx;
|
|
|
|
|
select indexrelid::regclass, indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- attaching idxpart22 is not enough to set idxpart22_a_idx valid ...
|
|
|
|
|
alter index idxpart2_a_idx attach partition idxpart22_a_idx;
|
|
|
|
|
\d idxpart2
|
|
|
|
|
-- ... but this one is.
|
|
|
|
|
create index on idxpart21 (a);
|
|
|
|
|
alter index idxpart2_a_idx attach partition idxpart21_a_idx;
|
|
|
|
|
\d idxpart2
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- When a table is attached a partition and it already has an index, a
|
|
|
|
|
-- duplicate index should not get created, but rather the index becomes
|
|
|
|
|
-- attached to the parent's index.
|
|
|
|
|
create table idxpart (a int, b int, c text, d bool) partition by range (a);
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
create index idxparti on idxpart (a);
|
|
|
|
|
create index idxparti2 on idxpart (b, c);
|
|
|
|
|
create table idxpart1 (like idxpart including indexes);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select relname, relkind, inhparent::regclass
|
|
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (10);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select relname, relkind, inhparent::regclass
|
|
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
-- While here, also check matching when creating an index after the fact.
|
|
|
|
|
create index on idxpart1 ((a+b)) where d = true;
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select relname, relkind, inhparent::regclass
|
|
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
create index idxparti3 on idxpart ((a+b)) where d = true;
|
|
|
|
|
\d idxpart1
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
select relname, relkind, inhparent::regclass
|
|
|
|
|
from pg_class left join pg_index ix on (indexrelid = oid)
|
|
|
|
|
left join pg_inherits on (ix.indexrelid = inhrelid)
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify that attaching an invalid index does not mark the parent index valid.
|
|
|
|
|
-- On the other hand, attaching a valid index marks not only its direct
|
|
|
|
|
-- ancestor valid, but also any indirect ancestor that was only missing the one
|
|
|
|
|
-- that was just made valid
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (1) to (1000) partition by range (a);
|
|
|
|
|
create table idxpart11 partition of idxpart1 for values from (1) to (100);
|
|
|
|
|
create index on only idxpart1 (a);
|
|
|
|
|
create index on only idxpart (a);
|
|
|
|
|
-- this results in two invalid indexes:
|
|
|
|
|
select relname, indisvalid from pg_class join pg_index on indexrelid = oid
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
-- idxpart1_a_idx is not valid, so idxpart_a_idx should not become valid:
|
|
|
|
|
alter index idxpart_a_idx attach partition idxpart1_a_idx;
|
|
|
|
|
select relname, indisvalid from pg_class join pg_index on indexrelid = oid
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
-- after creating and attaching this, both idxpart1_a_idx and idxpart_a_idx
|
|
|
|
|
-- should become valid
|
|
|
|
|
create index on idxpart11 (a);
|
|
|
|
|
alter index idxpart1_a_idx attach partition idxpart11_a_idx;
|
|
|
|
|
select relname, indisvalid from pg_class join pg_index on indexrelid = oid
|
|
|
|
|
where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- verify dependency handling during ALTER TABLE DETACH PARTITION
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
create index on idxpart1 (a);
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
create table idxpart2 (like idxpart);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0000) to (1000);
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (1000) to (2000);
|
|
|
|
|
create table idxpart3 partition of idxpart for values from (2000) to (3000);
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
-- a) after detaching partitions, the indexes can be dropped independently
|
|
|
|
|
alter table idxpart detach partition idxpart1;
|
|
|
|
|
alter table idxpart detach partition idxpart2;
|
|
|
|
|
alter table idxpart detach partition idxpart3;
|
|
|
|
|
drop index idxpart1_a_idx;
|
|
|
|
|
drop index idxpart2_a_idx;
|
|
|
|
|
drop index idxpart3_a_idx;
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart, idxpart1, idxpart2, idxpart3;
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
create index on idxpart1 (a);
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
create table idxpart2 (like idxpart);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0000) to (1000);
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (1000) to (2000);
|
|
|
|
|
create table idxpart3 partition of idxpart for values from (2000) to (3000);
|
|
|
|
|
-- b) after detaching, dropping the index on parent does not remove the others
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
alter table idxpart detach partition idxpart1;
|
|
|
|
|
alter table idxpart detach partition idxpart2;
|
|
|
|
|
alter table idxpart detach partition idxpart3;
|
|
|
|
|
drop index idxpart_a_idx;
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart, idxpart1, idxpart2, idxpart3;
|
|
|
|
|
select relname, relkind from pg_class where relname like 'idxpart%' order by relname;
|
|
|
|
|
|
Redesign the partition dependency mechanism.
The original setup for dependencies of partitioned objects had
serious problems:
1. It did not verify that a drop cascading to a partition-child object
also cascaded to at least one of the object's partition parents. Now,
normally a child object would share all its dependencies with one or
another parent (e.g. a child index's opclass dependencies would be shared
with the parent index), so that this oversight is usually harmless.
But if some dependency failed to fit this pattern, the child could be
dropped while all its parents remain, creating a logically broken
situation. (It's easy to construct artificial cases that break it,
such as attaching an unrelated extension dependency to the child object
and then dropping the extension. I'm not sure if any less-artificial
cases exist.)
2. Management of partition dependencies during ATTACH/DETACH PARTITION
was complicated and buggy; for example, after detaching a partition
table it was possible to create cases where a formerly-child index
should be dropped and was not, because the correct set of dependencies
had not been reconstructed.
Less seriously, because multiple partition relationships were
represented identically in pg_depend, there was an order-of-traversal
dependency on which partition parent was cited in error messages.
We also had some pre-existing order-of-traversal hazards for error
messages related to internal and extension dependencies. This is
cosmetic to users but causes testing problems.
To fix #1, add a check at the end of the partition tree traversal
to ensure that at least one partition parent got deleted. To fix #2,
establish a new policy that partition dependencies are in addition to,
not instead of, a child object's usual dependencies; in this way
ATTACH/DETACH PARTITION need not cope with adding or removing the
usual dependencies.
To fix the cosmetic problem, distinguish between primary and secondary
partition dependency entries in pg_depend, by giving them different
deptypes. (They behave identically except for having different
priorities for being cited in error messages.) This means that the
former 'I' dependency type is replaced with new 'P' and 'S' types.
This also fixes a longstanding bug that after handling an internal
dependency by recursing to the owning object, findDependentObjects
did not verify that the current target was now scheduled for deletion,
and did not apply the current recursion level's objflags to it.
Perhaps that should be back-patched; but in the back branches it
would only matter if some concurrent transaction had removed the
internal-linkage pg_depend entry before the recursive call found it,
or the recursive call somehow failed to find it, both of which seem
unlikely.
Catversion bump because the contents of pg_depend change for
partitioning relationships.
Patch HEAD only. It's annoying that we're not fixing #2 in v11,
but there seems no practical way to do so given that the problem
is exactly a poor choice of what entries to put in pg_depend.
We can't really fix that while staying compatible with what's
in pg_depend in existing v11 installations.
Discussion: https://postgr.es/m/CAH2-Wzkypv1R+teZrr71U23J578NnTBt2X8+Y=Odr4pOdW1rXg@mail.gmail.com
7 years ago
|
|
|
create table idxpart (a int, b int, c int) partition by range(a);
|
|
|
|
|
create index on idxpart(c);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (250);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (250) to (500);
|
|
|
|
|
alter table idxpart detach partition idxpart2;
|
|
|
|
|
\d idxpart2
|
|
|
|
|
alter table idxpart2 drop column c;
|
|
|
|
|
\d idxpart2
|
|
|
|
|
drop table idxpart, idxpart2;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- Verify that expression indexes inherit correctly
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
create index on idxpart1 ((a + b));
|
|
|
|
|
create index on idxpart ((a + b));
|
|
|
|
|
create table idxpart2 (like idxpart);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0000) to (1000);
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (1000) to (2000);
|
|
|
|
|
create table idxpart3 partition of idxpart for values from (2000) to (3000);
|
|
|
|
|
select relname as child, inhparent::regclass as parent, pg_get_indexdef as childdef
|
|
|
|
|
from pg_class join pg_inherits on inhrelid = oid,
|
|
|
|
|
lateral pg_get_indexdef(pg_class.oid)
|
|
|
|
|
where relkind in ('i', 'I') and relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify behavior for collation (mis)matches
|
|
|
|
|
create table idxpart (a text) partition by range (a);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
create table idxpart2 (like idxpart);
|
|
|
|
|
create index on idxpart2 (a collate "POSIX");
|
|
|
|
|
create index on idxpart2 (a);
|
|
|
|
|
create index on idxpart2 (a collate "C");
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from ('aaa') to ('bbb');
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from ('bbb') to ('ccc');
|
|
|
|
|
create table idxpart3 partition of idxpart for values from ('ccc') to ('ddd');
|
|
|
|
|
create index on idxpart (a collate "C");
|
|
|
|
|
create table idxpart4 partition of idxpart for values from ('ddd') to ('eee');
|
|
|
|
|
select relname as child, inhparent::regclass as parent, pg_get_indexdef as childdef
|
|
|
|
|
from pg_class left join pg_inherits on inhrelid = oid,
|
|
|
|
|
lateral pg_get_indexdef(pg_class.oid)
|
|
|
|
|
where relkind in ('i', 'I') and relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify behavior for opclass (mis)matches
|
|
|
|
|
create table idxpart (a text) partition by range (a);
|
|
|
|
|
create table idxpart1 (like idxpart);
|
|
|
|
|
create table idxpart2 (like idxpart);
|
|
|
|
|
create index on idxpart2 (a);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from ('aaa') to ('bbb');
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from ('bbb') to ('ccc');
|
|
|
|
|
create table idxpart3 partition of idxpart for values from ('ccc') to ('ddd');
|
|
|
|
|
create index on idxpart (a text_pattern_ops);
|
|
|
|
|
create table idxpart4 partition of idxpart for values from ('ddd') to ('eee');
|
|
|
|
|
-- must *not* have attached the index we created on idxpart2
|
|
|
|
|
select relname as child, inhparent::regclass as parent, pg_get_indexdef as childdef
|
|
|
|
|
from pg_class left join pg_inherits on inhrelid = oid,
|
|
|
|
|
lateral pg_get_indexdef(pg_class.oid)
|
|
|
|
|
where relkind in ('i', 'I') and relname like 'idxpart%' order by relname;
|
|
|
|
|
drop index idxpart_a_idx;
|
|
|
|
|
create index on only idxpart (a text_pattern_ops);
|
|
|
|
|
-- must reject
|
|
|
|
|
alter index idxpart_a_idx attach partition idxpart2_a_idx;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify that attaching indexes maps attribute numbers correctly
|
|
|
|
|
create table idxpart (col1 int, a int, col2 int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (b int, col1 int, col2 int, col3 int, a int);
|
|
|
|
|
alter table idxpart drop column col1, drop column col2;
|
|
|
|
|
alter table idxpart1 drop column col1, drop column col2, drop column col3;
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (1000);
|
|
|
|
|
create index idxpart_1_idx on only idxpart (b, a);
|
|
|
|
|
create index idxpart1_1_idx on idxpart1 (b, a);
|
|
|
|
|
create index idxpart1_1b_idx on idxpart1 (b);
|
|
|
|
|
-- test expressions and partial-index predicate, too
|
|
|
|
|
create index idxpart_2_idx on only idxpart ((b + a)) where a > 1;
|
|
|
|
|
create index idxpart1_2_idx on idxpart1 ((b + a)) where a > 1;
|
|
|
|
|
create index idxpart1_2b_idx on idxpart1 ((a + b)) where a > 1;
|
|
|
|
|
create index idxpart1_2c_idx on idxpart1 ((b + a)) where b > 1;
|
|
|
|
|
alter index idxpart_1_idx attach partition idxpart1_1b_idx; -- fail
|
|
|
|
|
alter index idxpart_1_idx attach partition idxpart1_1_idx;
|
|
|
|
|
alter index idxpart_2_idx attach partition idxpart1_2b_idx; -- fail
|
|
|
|
|
alter index idxpart_2_idx attach partition idxpart1_2c_idx; -- fail
|
|
|
|
|
alter index idxpart_2_idx attach partition idxpart1_2_idx; -- ok
|
|
|
|
|
select relname as child, inhparent::regclass as parent, pg_get_indexdef as childdef
|
|
|
|
|
from pg_class left join pg_inherits on inhrelid = oid,
|
|
|
|
|
lateral pg_get_indexdef(pg_class.oid)
|
|
|
|
|
where relkind in ('i', 'I') and relname like 'idxpart%' order by relname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Make sure the partition columns are mapped correctly
|
|
|
|
|
create table idxpart (a int, b int, c text) partition by range (a);
|
|
|
|
|
create index idxparti on idxpart (a);
|
|
|
|
|
create index idxparti2 on idxpart (c, b);
|
|
|
|
|
create table idxpart1 (c text, a int, b int);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (10);
|
|
|
|
|
create table idxpart2 (c text, a int, b int);
|
|
|
|
|
create index on idxpart2 (a);
|
|
|
|
|
create index on idxpart2 (c, b);
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (10) to (20);
|
|
|
|
|
select c.relname, pg_get_indexdef(indexrelid)
|
|
|
|
|
from pg_class c join pg_index i on c.oid = i.indexrelid
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify that columns are mapped correctly in expression indexes
|
|
|
|
|
create table idxpart (col1 int, col2 int, a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (col2 int, b int, col1 int, a int);
|
|
|
|
|
create table idxpart2 (col1 int, col2 int, b int, a int);
|
|
|
|
|
alter table idxpart drop column col1, drop column col2;
|
|
|
|
|
alter table idxpart1 drop column col1, drop column col2;
|
|
|
|
|
alter table idxpart2 drop column col1, drop column col2;
|
|
|
|
|
create index on idxpart2 (abs(b));
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (0) to (1);
|
|
|
|
|
create index on idxpart (abs(b));
|
|
|
|
|
create index on idxpart ((b + 1));
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
alter table idxpart attach partition idxpart1 for values from (1) to (2);
|
|
|
|
|
select c.relname, pg_get_indexdef(indexrelid)
|
|
|
|
|
from pg_class c join pg_index i on c.oid = i.indexrelid
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify that columns are mapped correctly for WHERE in a partial index
|
|
|
|
|
create table idxpart (col1 int, a int, col3 int, b int) partition by range (a);
|
|
|
|
|
alter table idxpart drop column col1, drop column col3;
|
|
|
|
|
create table idxpart1 (col1 int, col2 int, col3 int, col4 int, b int, a int);
|
|
|
|
|
alter table idxpart1 drop column col1, drop column col2, drop column col3, drop column col4;
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (1000);
|
|
|
|
|
create table idxpart2 (col1 int, col2 int, b int, a int);
|
|
|
|
|
create index on idxpart2 (a) where b > 1000;
|
|
|
|
|
alter table idxpart2 drop column col1, drop column col2;
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (1000) to (2000);
|
|
|
|
|
create index on idxpart (a) where b > 1000;
|
|
|
|
|
select c.relname, pg_get_indexdef(indexrelid)
|
|
|
|
|
from pg_class c join pg_index i on c.oid = i.indexrelid
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Column number mapping: dropped columns in the partition
|
|
|
|
|
create table idxpart1 (drop_1 int, drop_2 int, col_keep int, drop_3 int);
|
|
|
|
|
alter table idxpart1 drop column drop_1;
|
|
|
|
|
alter table idxpart1 drop column drop_2;
|
|
|
|
|
alter table idxpart1 drop column drop_3;
|
|
|
|
|
create index on idxpart1 (col_keep);
|
|
|
|
|
create table idxpart (col_keep int) partition by range (col_keep);
|
|
|
|
|
create index on idxpart (col_keep);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (1000);
|
|
|
|
|
\d idxpart
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select attrelid::regclass, attname, attnum from pg_attribute
|
|
|
|
|
where attrelid::regclass::text like 'idxpart%' and attnum > 0
|
|
|
|
|
order by attrelid::regclass, attnum;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Column number mapping: dropped columns in the parent table
|
|
|
|
|
create table idxpart(drop_1 int, drop_2 int, col_keep int, drop_3 int) partition by range (col_keep);
|
|
|
|
|
alter table idxpart drop column drop_1;
|
|
|
|
|
alter table idxpart drop column drop_2;
|
|
|
|
|
alter table idxpart drop column drop_3;
|
|
|
|
|
create table idxpart1 (col_keep int);
|
|
|
|
|
create index on idxpart1 (col_keep);
|
|
|
|
|
create index on idxpart (col_keep);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (1000);
|
|
|
|
|
\d idxpart
|
|
|
|
|
\d idxpart1
|
|
|
|
|
select attrelid::regclass, attname, attnum from pg_attribute
|
|
|
|
|
where attrelid::regclass::text like 'idxpart%' and attnum > 0
|
|
|
|
|
order by attrelid::regclass, attnum;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
--
|
|
|
|
|
-- Constraint-related indexes
|
|
|
|
|
--
|
|
|
|
|
|
|
|
|
|
-- Verify that it works to add primary key / unique to partitioned tables
|
|
|
|
|
create table idxpart (a int primary key, b int) partition by range (a);
|
|
|
|
|
\d idxpart
|
|
|
|
|
-- multiple primary key on child should fail
|
|
|
|
|
create table failpart partition of idxpart (b primary key) for values from (0) to (100);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- primary key on child is okay if there's no PK in the parent, though
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart1pk partition of idxpart (a primary key) for values from (0) to (100);
|
|
|
|
|
\d idxpart1pk
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Failing to use the full partition key is not allowed
|
|
|
|
|
create table idxpart (a int unique, b int) partition by range (a, b);
|
|
|
|
|
create table idxpart (a int, b int unique) partition by range (a, b);
|
|
|
|
|
create table idxpart (a int primary key, b int) partition by range (b, a);
|
|
|
|
|
create table idxpart (a int, b int primary key) partition by range (b, a);
|
|
|
|
|
|
|
|
|
|
-- OK if you use them in some other order
|
|
|
|
|
create table idxpart (a int, b int, c text, primary key (a, b, c)) partition by range (b, c, a);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- OK to add an exclusion constraint if partitioning by its equal column
|
|
|
|
|
create table idxpart (a int4range, exclude USING GIST (a with = )) partition by range (a);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK more than one equal column
|
|
|
|
|
create table idxpart (a int4range, b int4range, exclude USING GIST (a with =, b with =)) partition by range (a, b);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK with more than one equal column: constraint is a proper superset of partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range, exclude USING GIST (a with =, b with =)) partition by range (a);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- Not OK more than one equal column: partition keys are a proper superset of constraint
|
|
|
|
|
create table idxpart (a int4range, b int4range, exclude USING GIST (a with = )) partition by range (a, b);
|
|
|
|
|
-- Not OK with just -|-
|
|
|
|
|
create table idxpart (a int4range, exclude USING GIST (a with -|- )) partition by range (a);
|
|
|
|
|
-- OK with equals and &&, and equals is the partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range, exclude USING GIST (a with =, b with &&)) partition by range (a);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- Not OK with equals and &&, and equals is not the partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range, c int4range, exclude USING GIST (b with =, c with &&)) partition by range (a);
|
|
|
|
|
-- OK more than one equal column and a && column
|
|
|
|
|
create table idxpart (a int4range, b int4range, c int4range, exclude USING GIST (a with =, b with =, c with &&)) partition by range (a, b);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- no expressions in partition key for PK/UNIQUE
|
|
|
|
|
create table idxpart (a int primary key, b int) partition by range ((b + a));
|
|
|
|
|
create table idxpart (a int unique, b int) partition by range ((b + a));
|
|
|
|
|
|
|
|
|
|
-- use ALTER TABLE to add a primary key
|
|
|
|
|
create table idxpart (a int, b int, c text) partition by range (a, b);
|
|
|
|
|
alter table idxpart add primary key (a); -- not an incomplete one though
|
|
|
|
|
alter table idxpart add primary key (a, b); -- this works
|
|
|
|
|
\d idxpart
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0, 0) to (1000, 1000);
|
|
|
|
|
\d idxpart1
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- use ALTER TABLE to add a unique constraint
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a, b);
|
|
|
|
|
alter table idxpart add unique (a); -- not an incomplete one though
|
|
|
|
|
alter table idxpart add unique (b, a); -- this works
|
|
|
|
|
\d idxpart
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Exclusion constraints can be added if partitioning by their equal column
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK more than one equal column
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a, b);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =, b with =);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK with more than one equal column: constraint is a proper superset of partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =, b with =);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- Not OK more than one equal column: partition keys are a proper superset of constraint
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a, b);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- Not OK with just -|-
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a, b);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with -|-);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK with equals and &&, and equals is the partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range) partition by range (a);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =, b with &&);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- Not OK with equals and &&, and equals is not the partition key
|
|
|
|
|
create table idxpart (a int4range, b int4range, c int4range) partition by range (a);
|
|
|
|
|
alter table idxpart add exclude USING GIST (b with =, c with &&);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
-- OK more than one equal column and a && column
|
|
|
|
|
create table idxpart (a int4range, b int4range, c int4range) partition by range (a, b);
|
|
|
|
|
alter table idxpart add exclude USING GIST (a with =, b with =, c with &&);
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- When (sub)partitions are created, they also contain the constraint
|
|
|
|
|
create table idxpart (a int, b int, primary key (a, b)) partition by range (a, b);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (1, 1) to (10, 10);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (10, 10) to (20, 20)
|
|
|
|
|
partition by range (b);
|
|
|
|
|
create table idxpart21 partition of idxpart2 for values from (10) to (15);
|
|
|
|
|
create table idxpart22 partition of idxpart2 for values from (15) to (20);
|
|
|
|
|
create table idxpart3 (b int not null, a int not null);
|
|
|
|
|
alter table idxpart attach partition idxpart3 for values from (20, 20) to (30, 30);
|
|
|
|
|
select conname, contype, conrelid::regclass, conindid::regclass, conkey
|
|
|
|
|
from pg_constraint where conrelid::regclass::text like 'idxpart%'
|
Catalog not-null constraints
We now create contype='n' pg_constraint rows for not-null constraints.
We propagate these constraints to other tables during operations such as
adding inheritance relationships, creating and attaching partitions and
creating tables LIKE other tables. We also spawn not-null constraints
for inheritance child tables when their parents have primary keys.
These related constraints mostly follow the well-known rules of
conislocal and coninhcount that we have for CHECK constraints, with some
adaptations: for example, as opposed to CHECK constraints, we don't
match not-null ones by name when descending a hierarchy to alter it,
instead matching by column name that they apply to. This means we don't
require the constraint names to be identical across a hierarchy.
For now, we omit them for system catalogs. Maybe this is worth
reconsidering. We don't support NOT VALID nor DEFERRABLE clauses
either; these can be added as separate features later (this patch is
already large and complicated enough.)
psql shows these constraints in \d+.
pg_dump requires some ad-hoc hacks, particularly when dumping a primary
key. We now create one "throwaway" not-null constraint for each column
in the PK together with the CREATE TABLE command, and once the PK is
created, all those throwaway constraints are removed. This avoids
having to check each tuple for nullness when the dump restores the
primary key creation.
pg_upgrading from an older release requires a somewhat brittle procedure
to create a constraint state that matches what would be created if the
database were being created fresh in Postgres 17. I have tested all the
scenarios I could think of, and it works correctly as far as I can tell,
but I could have neglected weird cases.
This patch has been very long in the making. The first patch was
written by Bernd Helmle in 2010 to add a new pg_constraint.contype value
('n'), which I (Álvaro) then hijacked in 2011 and 2012, until that one
was killed by the realization that we ought to use contype='c' instead:
manufactured CHECK constraints. However, later SQL standard
development, as well as nonobvious emergent properties of that design
(mostly, failure to distinguish them from "normal" CHECK constraints as
well as the performance implication of having to test the CHECK
expression) led us to reconsider this choice, so now the current
implementation uses contype='n' again. During Postgres 16 this had
already been introduced by commit e056c557aef4, but there were some
problems mainly with the pg_upgrade procedure that couldn't be fixed in
reasonable time, so it was reverted.
In 2016 Vitaly Burovoy also worked on this feature[1] but found no
consensus for his proposed approach, which was claimed to be closer to
the letter of the standard, requiring an additional pg_attribute column
to track the OID of the not-null constraint for that column.
[1] https://postgr.es/m/CAKOSWNkN6HSyatuys8xZxzRCR-KL1OkHS5-b9qd9bf1Rad3PLA@mail.gmail.com
Author: Álvaro Herrera <alvherre@alvh.no-ip.org>
Author: Bernd Helmle <mailings@oopsware.de>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
2 years ago
|
|
|
order by conrelid::regclass::text, conname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Verify that multi-layer partitioning honors the requirement that all
|
|
|
|
|
-- columns in the partition key must appear in primary/unique key
|
|
|
|
|
create table idxpart (a int, b int, primary key (a)) partition by range (a);
|
|
|
|
|
create table idxpart2 partition of idxpart
|
|
|
|
|
for values from (0) to (1000) partition by range (b); -- fail
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Ditto for the ATTACH PARTITION case
|
|
|
|
|
create table idxpart (a int unique, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (a int not null, b int, unique (a, b))
|
|
|
|
|
partition by range (a, b);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (1) to (1000);
|
|
|
|
|
DROP TABLE idxpart, idxpart1;
|
|
|
|
|
|
|
|
|
|
-- Multi-layer partitioning works correctly in this case:
|
|
|
|
|
create table idxpart (a int, b int, primary key (a, b)) partition by range (a);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (0) to (1000) partition by range (b);
|
|
|
|
|
create table idxpart21 partition of idxpart2 for values from (0) to (1000);
|
|
|
|
|
select conname, contype, conrelid::regclass, conindid::regclass, conkey
|
|
|
|
|
from pg_constraint where conrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by conname;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- If a partitioned table has a unique/PK constraint, then it's not possible
|
|
|
|
|
-- to drop the corresponding constraint in the children; nor it's possible
|
|
|
|
|
-- to drop the indexes individually. Dropping the constraint in the parent
|
|
|
|
|
-- gets rid of the lot.
|
|
|
|
|
create table idxpart (i int) partition by hash (i);
|
|
|
|
|
create table idxpart0 partition of idxpart (i) for values with (modulus 2, remainder 0);
|
|
|
|
|
create table idxpart1 partition of idxpart (i) for values with (modulus 2, remainder 1);
|
|
|
|
|
alter table idxpart0 add primary key(i);
|
|
|
|
|
alter table idxpart add primary key(i);
|
|
|
|
|
select indrelid::regclass, indexrelid::regclass, inhparent::regclass, indisvalid,
|
|
|
|
|
conname, conislocal, coninhcount, connoinherit, convalidated
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
left join pg_constraint con on (idx.indexrelid = con.conindid)
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop index idxpart0_pkey; -- fail
|
|
|
|
|
drop index idxpart1_pkey; -- fail
|
|
|
|
|
alter table idxpart0 drop constraint idxpart0_pkey; -- fail
|
|
|
|
|
alter table idxpart1 drop constraint idxpart1_pkey; -- fail
|
|
|
|
|
alter table idxpart drop constraint idxpart_pkey; -- ok
|
|
|
|
|
select indrelid::regclass, indexrelid::regclass, inhparent::regclass, indisvalid,
|
|
|
|
|
conname, conislocal, coninhcount, connoinherit, convalidated
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
left join pg_constraint con on (idx.indexrelid = con.conindid)
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- If the partition to be attached already has a primary key, fail if
|
|
|
|
|
-- it doesn't match the parent's PK.
|
|
|
|
|
CREATE TABLE idxpart (c1 INT PRIMARY KEY, c2 INT, c3 VARCHAR(10)) PARTITION BY RANGE(c1);
|
|
|
|
|
CREATE TABLE idxpart1 (LIKE idxpart);
|
|
|
|
|
ALTER TABLE idxpart1 ADD PRIMARY KEY (c1, c2);
|
|
|
|
|
ALTER TABLE idxpart ATTACH PARTITION idxpart1 FOR VALUES FROM (100) TO (200);
|
|
|
|
|
DROP TABLE idxpart, idxpart1;
|
|
|
|
|
|
|
|
|
|
-- Ditto if there is some distance between the PKs (subpartitioning)
|
|
|
|
|
create table idxpart (a int, b int, primary key (a)) partition by range (a);
|
|
|
|
|
create table idxpart1 (a int not null, b int) partition by range (a);
|
|
|
|
|
create table idxpart11 (a int not null, b int primary key);
|
|
|
|
|
alter table idxpart1 attach partition idxpart11 for values from (0) to (1000);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (0) to (10000);
|
|
|
|
|
drop table idxpart, idxpart1, idxpart11;
|
|
|
|
|
|
|
|
|
|
-- If a partitioned table has a constraint whose index is not valid,
|
|
|
|
|
-- attaching a missing partition makes it valid.
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart0 (like idxpart);
|
|
|
|
|
alter table idxpart0 add primary key (a);
|
|
|
|
|
alter table idxpart attach partition idxpart0 for values from (0) to (1000);
|
|
|
|
|
alter table only idxpart add primary key (a);
|
|
|
|
|
select indrelid::regclass, indexrelid::regclass, inhparent::regclass, indisvalid,
|
|
|
|
|
conname, conislocal, coninhcount, connoinherit, convalidated
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
left join pg_constraint con on (idx.indexrelid = con.conindid)
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
alter index idxpart_pkey attach partition idxpart0_pkey;
|
|
|
|
|
select indrelid::regclass, indexrelid::regclass, inhparent::regclass, indisvalid,
|
|
|
|
|
conname, conislocal, coninhcount, connoinherit, convalidated
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
left join pg_constraint con on (idx.indexrelid = con.conindid)
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
Avoid order-of-execution problems with ALTER TABLE ADD PRIMARY KEY.
Up to now, DefineIndex() was responsible for adding attnotnull constraints
to the columns of a primary key, in any case where it hadn't been
convenient for transformIndexConstraint() to mark those columns as
is_not_null. It (or rather its minion index_check_primary_key) did this
by executing an ALTER TABLE SET NOT NULL command for the target table.
The trouble with this solution is that if we're creating the index due
to ALTER TABLE ADD PRIMARY KEY, and the outer ALTER TABLE has additional
sub-commands, the inner ALTER TABLE's operations executed at the wrong
time with respect to the outer ALTER TABLE's operations. In particular,
the inner ALTER would perform a validation scan at a point where the
table's storage might be inconsistent with its catalog entries. (This is
on the hairy edge of being a security problem, but AFAICS it isn't one
because the inner scan would only be interested in the tuples' null
bitmaps.) This can result in unexpected failures, such as the one seen
in bug #15580 from Allison Kaptur.
To fix, let's remove the attempt to do SET NOT NULL from DefineIndex(),
reducing index_check_primary_key's role to verifying that the columns are
already not null. (It shouldn't ever see such a case, but it seems wise
to keep the check for safety.) Instead, make transformIndexConstraint()
generate ALTER TABLE SET NOT NULL subcommands to be executed ahead of
the ADD PRIMARY KEY operation in every case where it can't force the
column to be created already-not-null. This requires only minor surgery
in parse_utilcmd.c, and it makes for a much more satisfying spec for
transformIndexConstraint(): it's no longer having to take it on faith
that someone else will handle addition of NOT NULL constraints.
To make that work, we have to move the execution of AT_SetNotNull into
an ALTER pass that executes ahead of AT_PASS_ADD_INDEX. I moved it to
AT_PASS_COL_ATTRS, and put that after AT_PASS_ADD_COL to avoid failure
when the column is being added in the same command. This incidentally
fixes a bug in the only previous usage of AT_PASS_COL_ATTRS, for
AT_SetIdentity: it didn't work either for a newly-added column.
Playing around with this exposed a separate bug in ALTER TABLE ONLY ...
ADD PRIMARY KEY for partitioned tables. The intent of the ONLY modifier
in that context is to prevent doing anything that would require holding
lock for a long time --- but the implied SET NOT NULL would recurse to
the child partitions, and do an expensive validation scan for any child
where the column(s) were not already NOT NULL. To fix that, invent a
new ALTER subcommand AT_CheckNotNull that just insists that a child
column be already NOT NULL, and apply that, not AT_SetNotNull, when
recursing to children in this scenario. This results in a slightly laxer
definition of ALTER TABLE ONLY ... SET NOT NULL for partitioned tables,
too: that command will now work as long as all children are already NOT
NULL, whereas before it just threw up its hands if there were any
partitions.
In passing, clean up the API of generateClonedIndexStmt(): remove a
useless argument, ensure that the output argument is not left undefined,
update the header comment.
A small side effect of this change is that no-such-column errors in ALTER
TABLE ADD PRIMARY KEY now produce a different message that includes the
table name, because they are now detected by the SET NOT NULL step which
has historically worded its error that way. That seems fine to me, so
I didn't make any effort to avoid the wording change.
The basic bug #15580 is of very long standing, and these other bugs
aren't new in v12 either. However, this is a pretty significant change
in the way ALTER TABLE ADD PRIMARY KEY works. On balance it seems best
not to back-patch, at least not till we get some more confidence that
this patch has no new bugs.
Patch by me, but thanks to Jie Zhang for a preliminary version.
Discussion: https://postgr.es/m/15580-d1a6de5a3d65da51@postgresql.org
Discussion: https://postgr.es/m/1396E95157071C4EBBA51892C5368521017F2E6E63@G08CNEXMBPEKD02.g08.fujitsu.local
7 years ago
|
|
|
-- Related to the above scenario: ADD PRIMARY KEY on the parent mustn't
|
|
|
|
|
-- automatically propagate NOT NULL to child columns.
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart0 (like idxpart);
|
|
|
|
|
alter table idxpart0 add unique (a);
|
|
|
|
|
alter table idxpart attach partition idxpart0 default;
|
|
|
|
|
alter table only idxpart add primary key (a); -- fail, no not-null constraint
|
Avoid order-of-execution problems with ALTER TABLE ADD PRIMARY KEY.
Up to now, DefineIndex() was responsible for adding attnotnull constraints
to the columns of a primary key, in any case where it hadn't been
convenient for transformIndexConstraint() to mark those columns as
is_not_null. It (or rather its minion index_check_primary_key) did this
by executing an ALTER TABLE SET NOT NULL command for the target table.
The trouble with this solution is that if we're creating the index due
to ALTER TABLE ADD PRIMARY KEY, and the outer ALTER TABLE has additional
sub-commands, the inner ALTER TABLE's operations executed at the wrong
time with respect to the outer ALTER TABLE's operations. In particular,
the inner ALTER would perform a validation scan at a point where the
table's storage might be inconsistent with its catalog entries. (This is
on the hairy edge of being a security problem, but AFAICS it isn't one
because the inner scan would only be interested in the tuples' null
bitmaps.) This can result in unexpected failures, such as the one seen
in bug #15580 from Allison Kaptur.
To fix, let's remove the attempt to do SET NOT NULL from DefineIndex(),
reducing index_check_primary_key's role to verifying that the columns are
already not null. (It shouldn't ever see such a case, but it seems wise
to keep the check for safety.) Instead, make transformIndexConstraint()
generate ALTER TABLE SET NOT NULL subcommands to be executed ahead of
the ADD PRIMARY KEY operation in every case where it can't force the
column to be created already-not-null. This requires only minor surgery
in parse_utilcmd.c, and it makes for a much more satisfying spec for
transformIndexConstraint(): it's no longer having to take it on faith
that someone else will handle addition of NOT NULL constraints.
To make that work, we have to move the execution of AT_SetNotNull into
an ALTER pass that executes ahead of AT_PASS_ADD_INDEX. I moved it to
AT_PASS_COL_ATTRS, and put that after AT_PASS_ADD_COL to avoid failure
when the column is being added in the same command. This incidentally
fixes a bug in the only previous usage of AT_PASS_COL_ATTRS, for
AT_SetIdentity: it didn't work either for a newly-added column.
Playing around with this exposed a separate bug in ALTER TABLE ONLY ...
ADD PRIMARY KEY for partitioned tables. The intent of the ONLY modifier
in that context is to prevent doing anything that would require holding
lock for a long time --- but the implied SET NOT NULL would recurse to
the child partitions, and do an expensive validation scan for any child
where the column(s) were not already NOT NULL. To fix that, invent a
new ALTER subcommand AT_CheckNotNull that just insists that a child
column be already NOT NULL, and apply that, not AT_SetNotNull, when
recursing to children in this scenario. This results in a slightly laxer
definition of ALTER TABLE ONLY ... SET NOT NULL for partitioned tables,
too: that command will now work as long as all children are already NOT
NULL, whereas before it just threw up its hands if there were any
partitions.
In passing, clean up the API of generateClonedIndexStmt(): remove a
useless argument, ensure that the output argument is not left undefined,
update the header comment.
A small side effect of this change is that no-such-column errors in ALTER
TABLE ADD PRIMARY KEY now produce a different message that includes the
table name, because they are now detected by the SET NOT NULL step which
has historically worded its error that way. That seems fine to me, so
I didn't make any effort to avoid the wording change.
The basic bug #15580 is of very long standing, and these other bugs
aren't new in v12 either. However, this is a pretty significant change
in the way ALTER TABLE ADD PRIMARY KEY works. On balance it seems best
not to back-patch, at least not till we get some more confidence that
this patch has no new bugs.
Patch by me, but thanks to Jie Zhang for a preliminary version.
Discussion: https://postgr.es/m/15580-d1a6de5a3d65da51@postgresql.org
Discussion: https://postgr.es/m/1396E95157071C4EBBA51892C5368521017F2E6E63@G08CNEXMBPEKD02.g08.fujitsu.local
7 years ago
|
|
|
alter table idxpart0 alter column a set not null;
|
|
|
|
|
alter table only idxpart add primary key (a); -- now it works
|
Catalog not-null constraints
We now create contype='n' pg_constraint rows for not-null constraints.
We propagate these constraints to other tables during operations such as
adding inheritance relationships, creating and attaching partitions and
creating tables LIKE other tables. We also spawn not-null constraints
for inheritance child tables when their parents have primary keys.
These related constraints mostly follow the well-known rules of
conislocal and coninhcount that we have for CHECK constraints, with some
adaptations: for example, as opposed to CHECK constraints, we don't
match not-null ones by name when descending a hierarchy to alter it,
instead matching by column name that they apply to. This means we don't
require the constraint names to be identical across a hierarchy.
For now, we omit them for system catalogs. Maybe this is worth
reconsidering. We don't support NOT VALID nor DEFERRABLE clauses
either; these can be added as separate features later (this patch is
already large and complicated enough.)
psql shows these constraints in \d+.
pg_dump requires some ad-hoc hacks, particularly when dumping a primary
key. We now create one "throwaway" not-null constraint for each column
in the PK together with the CREATE TABLE command, and once the PK is
created, all those throwaway constraints are removed. This avoids
having to check each tuple for nullness when the dump restores the
primary key creation.
pg_upgrading from an older release requires a somewhat brittle procedure
to create a constraint state that matches what would be created if the
database were being created fresh in Postgres 17. I have tested all the
scenarios I could think of, and it works correctly as far as I can tell,
but I could have neglected weird cases.
This patch has been very long in the making. The first patch was
written by Bernd Helmle in 2010 to add a new pg_constraint.contype value
('n'), which I (Álvaro) then hijacked in 2011 and 2012, until that one
was killed by the realization that we ought to use contype='c' instead:
manufactured CHECK constraints. However, later SQL standard
development, as well as nonobvious emergent properties of that design
(mostly, failure to distinguish them from "normal" CHECK constraints as
well as the performance implication of having to test the CHECK
expression) led us to reconsider this choice, so now the current
implementation uses contype='n' again. During Postgres 16 this had
already been introduced by commit e056c557aef4, but there were some
problems mainly with the pg_upgrade procedure that couldn't be fixed in
reasonable time, so it was reverted.
In 2016 Vitaly Burovoy also worked on this feature[1] but found no
consensus for his proposed approach, which was claimed to be closer to
the letter of the standard, requiring an additional pg_attribute column
to track the OID of the not-null constraint for that column.
[1] https://postgr.es/m/CAKOSWNkN6HSyatuys8xZxzRCR-KL1OkHS5-b9qd9bf1Rad3PLA@mail.gmail.com
Author: Álvaro Herrera <alvherre@alvh.no-ip.org>
Author: Bernd Helmle <mailings@oopsware.de>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
2 years ago
|
|
|
alter index idxpart_pkey attach partition idxpart0_a_key;
|
Avoid order-of-execution problems with ALTER TABLE ADD PRIMARY KEY.
Up to now, DefineIndex() was responsible for adding attnotnull constraints
to the columns of a primary key, in any case where it hadn't been
convenient for transformIndexConstraint() to mark those columns as
is_not_null. It (or rather its minion index_check_primary_key) did this
by executing an ALTER TABLE SET NOT NULL command for the target table.
The trouble with this solution is that if we're creating the index due
to ALTER TABLE ADD PRIMARY KEY, and the outer ALTER TABLE has additional
sub-commands, the inner ALTER TABLE's operations executed at the wrong
time with respect to the outer ALTER TABLE's operations. In particular,
the inner ALTER would perform a validation scan at a point where the
table's storage might be inconsistent with its catalog entries. (This is
on the hairy edge of being a security problem, but AFAICS it isn't one
because the inner scan would only be interested in the tuples' null
bitmaps.) This can result in unexpected failures, such as the one seen
in bug #15580 from Allison Kaptur.
To fix, let's remove the attempt to do SET NOT NULL from DefineIndex(),
reducing index_check_primary_key's role to verifying that the columns are
already not null. (It shouldn't ever see such a case, but it seems wise
to keep the check for safety.) Instead, make transformIndexConstraint()
generate ALTER TABLE SET NOT NULL subcommands to be executed ahead of
the ADD PRIMARY KEY operation in every case where it can't force the
column to be created already-not-null. This requires only minor surgery
in parse_utilcmd.c, and it makes for a much more satisfying spec for
transformIndexConstraint(): it's no longer having to take it on faith
that someone else will handle addition of NOT NULL constraints.
To make that work, we have to move the execution of AT_SetNotNull into
an ALTER pass that executes ahead of AT_PASS_ADD_INDEX. I moved it to
AT_PASS_COL_ATTRS, and put that after AT_PASS_ADD_COL to avoid failure
when the column is being added in the same command. This incidentally
fixes a bug in the only previous usage of AT_PASS_COL_ATTRS, for
AT_SetIdentity: it didn't work either for a newly-added column.
Playing around with this exposed a separate bug in ALTER TABLE ONLY ...
ADD PRIMARY KEY for partitioned tables. The intent of the ONLY modifier
in that context is to prevent doing anything that would require holding
lock for a long time --- but the implied SET NOT NULL would recurse to
the child partitions, and do an expensive validation scan for any child
where the column(s) were not already NOT NULL. To fix that, invent a
new ALTER subcommand AT_CheckNotNull that just insists that a child
column be already NOT NULL, and apply that, not AT_SetNotNull, when
recursing to children in this scenario. This results in a slightly laxer
definition of ALTER TABLE ONLY ... SET NOT NULL for partitioned tables,
too: that command will now work as long as all children are already NOT
NULL, whereas before it just threw up its hands if there were any
partitions.
In passing, clean up the API of generateClonedIndexStmt(): remove a
useless argument, ensure that the output argument is not left undefined,
update the header comment.
A small side effect of this change is that no-such-column errors in ALTER
TABLE ADD PRIMARY KEY now produce a different message that includes the
table name, because they are now detected by the SET NOT NULL step which
has historically worded its error that way. That seems fine to me, so
I didn't make any effort to avoid the wording change.
The basic bug #15580 is of very long standing, and these other bugs
aren't new in v12 either. However, this is a pretty significant change
in the way ALTER TABLE ADD PRIMARY KEY works. On balance it seems best
not to back-patch, at least not till we get some more confidence that
this patch has no new bugs.
Patch by me, but thanks to Jie Zhang for a preliminary version.
Discussion: https://postgr.es/m/15580-d1a6de5a3d65da51@postgresql.org
Discussion: https://postgr.es/m/1396E95157071C4EBBA51892C5368521017F2E6E63@G08CNEXMBPEKD02.g08.fujitsu.local
7 years ago
|
|
|
alter table idxpart0 alter column a drop not null; -- fail, pkey needs it
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- if a partition has a unique index without a constraint, does not attach
|
|
|
|
|
-- automatically; creates a new index instead.
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (a int not null, b int);
|
|
|
|
|
create unique index on idxpart1 (a);
|
|
|
|
|
alter table idxpart add primary key (a);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (1) to (1000);
|
|
|
|
|
select indrelid::regclass, indexrelid::regclass, inhparent::regclass, indisvalid,
|
|
|
|
|
conname, conislocal, coninhcount, connoinherit, convalidated
|
|
|
|
|
from pg_index idx left join pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
left join pg_constraint con on (idx.indexrelid = con.conindid)
|
|
|
|
|
where indrelid::regclass::text like 'idxpart%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Can't attach an index without a corresponding constraint
|
|
|
|
|
create table idxpart (a int, b int) partition by range (a);
|
|
|
|
|
create table idxpart1 (a int not null, b int);
|
|
|
|
|
create unique index on idxpart1 (a);
|
|
|
|
|
alter table idxpart attach partition idxpart1 for values from (1) to (1000);
|
|
|
|
|
alter table only idxpart add primary key (a);
|
|
|
|
|
alter index idxpart_pkey attach partition idxpart1_a_idx; -- fail
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Test that unique constraints are working
|
|
|
|
|
create table idxpart (a int, b text, primary key (a, b)) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (100000);
|
|
|
|
|
create table idxpart2 (c int, like idxpart);
|
|
|
|
|
insert into idxpart2 (c, a, b) values (42, 572814, 'inserted first');
|
|
|
|
|
alter table idxpart2 drop column c;
|
|
|
|
|
create unique index on idxpart (a);
|
|
|
|
|
alter table idxpart attach partition idxpart2 for values from (100000) to (1000000);
|
|
|
|
|
insert into idxpart values (0, 'zero'), (42, 'life'), (2^16, 'sixteen');
|
|
|
|
|
insert into idxpart select 2^g, format('two to power of %s', g) from generate_series(15, 17) g;
|
|
|
|
|
insert into idxpart values (16, 'sixteen');
|
|
|
|
|
insert into idxpart (b, a) values ('one', 142857), ('two', 285714);
|
|
|
|
|
insert into idxpart select a * 2, b || b from idxpart where a between 2^16 and 2^19;
|
|
|
|
|
insert into idxpart values (572814, 'five');
|
|
|
|
|
insert into idxpart values (857142, 'six');
|
|
|
|
|
select tableoid::regclass, * from idxpart order by a;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
|
|
|
|
-- Test some other non-btree index types
|
|
|
|
|
create table idxpart (a int, b text, c int[]) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (100000);
|
|
|
|
|
set enable_seqscan to off;
|
|
|
|
|
|
|
|
|
|
create index idxpart_brin on idxpart using brin(b);
|
|
|
|
|
explain (costs off) select * from idxpart where b = 'abcd';
|
|
|
|
|
drop index idxpart_brin;
|
|
|
|
|
|
|
|
|
|
create index idxpart_spgist on idxpart using spgist(b);
|
|
|
|
|
explain (costs off) select * from idxpart where b = 'abcd';
|
|
|
|
|
drop index idxpart_spgist;
|
|
|
|
|
|
|
|
|
|
create index idxpart_gin on idxpart using gin(c);
|
|
|
|
|
explain (costs off) select * from idxpart where c @> array[42];
|
|
|
|
|
drop index idxpart_gin;
|
|
|
|
|
|
|
|
|
|
reset enable_seqscan;
|
|
|
|
|
drop table idxpart;
|
|
|
|
|
|
Local partitioned indexes
When CREATE INDEX is run on a partitioned table, create catalog entries
for an index on the partitioned table (which is just a placeholder since
the table proper has no data of its own), and recurse to create actual
indexes on the existing partitions; create them in future partitions
also.
As a convenience gadget, if the new index definition matches some
existing index in partitions, these are picked up and used instead of
creating new ones. Whichever way these indexes come about, they become
attached to the index on the parent table and are dropped alongside it,
and cannot be dropped on isolation unless they are detached first.
To support pg_dump'ing these indexes, add commands
CREATE INDEX ON ONLY <table>
(which creates the index on the parent partitioned table, without
recursing) and
ALTER INDEX ATTACH PARTITION
(which is used after the indexes have been created individually on each
partition, to attach them to the parent index). These reconstruct prior
database state exactly.
Reviewed-by: (in alphabetical order) Peter Eisentraut, Robert Haas, Amit
Langote, Jesper Pedersen, Simon Riggs, David Rowley
Discussion: https://postgr.es/m/20171113170646.gzweigyrgg6pwsg4@alvherre.pgsql
8 years ago
|
|
|
-- intentionally leave some objects around
|
|
|
|
|
create table idxpart (a int) partition by range (a);
|
|
|
|
|
create table idxpart1 partition of idxpart for values from (0) to (100);
|
|
|
|
|
create table idxpart2 partition of idxpart for values from (100) to (1000)
|
|
|
|
|
partition by range (a);
|
|
|
|
|
create table idxpart21 partition of idxpart2 for values from (100) to (200);
|
|
|
|
|
create table idxpart22 partition of idxpart2 for values from (200) to (300);
|
|
|
|
|
create index on idxpart22 (a);
|
|
|
|
|
create index on only idxpart2 (a);
|
|
|
|
|
alter index idxpart2_a_idx attach partition idxpart22_a_idx;
|
|
|
|
|
create index on idxpart (a);
|
|
|
|
|
create table idxpart_another (a int, b int, primary key (a, b)) partition by range (a);
|
|
|
|
|
create table idxpart_another_1 partition of idxpart_another for values from (0) to (100);
|
|
|
|
|
create table idxpart3 (c int, b int, a int) partition by range (a);
|
|
|
|
|
alter table idxpart3 drop column b, drop column c;
|
|
|
|
|
create table idxpart31 partition of idxpart3 for values from (1000) to (1200);
|
|
|
|
|
create table idxpart32 partition of idxpart3 for values from (1200) to (1400);
|
|
|
|
|
alter table idxpart attach partition idxpart3 for values from (1000) to (2000);
|
|
|
|
|
|
|
|
|
|
-- More objects intentionally left behind, to verify some pg_dump/pg_upgrade
|
|
|
|
|
-- behavior; see https://postgr.es/m/20190321204928.GA17535@alvherre.pgsql
|
|
|
|
|
create schema regress_indexing;
|
|
|
|
|
set search_path to regress_indexing;
|
|
|
|
|
create table pk (a int primary key) partition by range (a);
|
|
|
|
|
create table pk1 partition of pk for values from (0) to (1000);
|
|
|
|
|
create table pk2 (b int, a int);
|
|
|
|
|
alter table pk2 drop column b;
|
|
|
|
|
alter table pk2 alter a set not null;
|
|
|
|
|
alter table pk attach partition pk2 for values from (1000) to (2000);
|
|
|
|
|
create table pk3 partition of pk for values from (2000) to (3000);
|
|
|
|
|
create table pk4 (like pk);
|
|
|
|
|
alter table pk attach partition pk4 for values from (3000) to (4000);
|
|
|
|
|
create table pk5 (like pk) partition by range (a);
|
|
|
|
|
create table pk51 partition of pk5 for values from (4000) to (4500);
|
|
|
|
|
create table pk52 partition of pk5 for values from (4500) to (5000);
|
|
|
|
|
alter table pk attach partition pk5 for values from (4000) to (5000);
|
|
|
|
|
reset search_path;
|
|
|
|
|
|
|
|
|
|
-- Test that covering partitioned indexes work in various cases
|
|
|
|
|
create table covidxpart (a int, b int) partition by list (a);
|
|
|
|
|
create unique index on covidxpart (a) include (b);
|
|
|
|
|
create table covidxpart1 partition of covidxpart for values in (1);
|
|
|
|
|
create table covidxpart2 partition of covidxpart for values in (2);
|
|
|
|
|
insert into covidxpart values (1, 1);
|
|
|
|
|
insert into covidxpart values (1, 1);
|
|
|
|
|
create table covidxpart3 (b int, c int, a int);
|
|
|
|
|
alter table covidxpart3 drop c;
|
|
|
|
|
alter table covidxpart attach partition covidxpart3 for values in (3);
|
|
|
|
|
insert into covidxpart values (3, 1);
|
|
|
|
|
insert into covidxpart values (3, 1);
|
|
|
|
|
create table covidxpart4 (b int, a int);
|
|
|
|
|
create unique index on covidxpart4 (a) include (b);
|
|
|
|
|
create unique index on covidxpart4 (a);
|
|
|
|
|
alter table covidxpart attach partition covidxpart4 for values in (4);
|
|
|
|
|
insert into covidxpart values (4, 1);
|
|
|
|
|
insert into covidxpart values (4, 1);
|
|
|
|
|
create unique index on covidxpart (b) include (a); -- should fail
|
|
|
|
|
|
|
|
|
|
-- check that detaching a partition also detaches the primary key constraint
|
|
|
|
|
create table parted_pk_detach_test (a int primary key) partition by list (a);
|
|
|
|
|
create table parted_pk_detach_test1 partition of parted_pk_detach_test for values in (1);
|
|
|
|
|
alter table parted_pk_detach_test1 drop constraint parted_pk_detach_test1_pkey; -- should fail
|
|
|
|
|
alter table parted_pk_detach_test detach partition parted_pk_detach_test1;
|
|
|
|
|
alter table parted_pk_detach_test1 drop constraint parted_pk_detach_test1_pkey;
|
|
|
|
|
drop table parted_pk_detach_test, parted_pk_detach_test1;
|
|
|
|
|
create table parted_uniq_detach_test (a int unique) partition by list (a);
|
|
|
|
|
create table parted_uniq_detach_test1 partition of parted_uniq_detach_test for values in (1);
|
|
|
|
|
alter table parted_uniq_detach_test1 drop constraint parted_uniq_detach_test1_a_key; -- should fail
|
|
|
|
|
alter table parted_uniq_detach_test detach partition parted_uniq_detach_test1;
|
|
|
|
|
alter table parted_uniq_detach_test1 drop constraint parted_uniq_detach_test1_a_key;
|
|
|
|
|
drop table parted_uniq_detach_test, parted_uniq_detach_test1;
|
|
|
|
|
|
|
|
|
|
-- check that dropping a column takes with it any partitioned indexes
|
|
|
|
|
-- depending on it.
|
|
|
|
|
create table parted_index_col_drop(a int, b int, c int)
|
|
|
|
|
partition by list (a);
|
|
|
|
|
create table parted_index_col_drop1 partition of parted_index_col_drop
|
|
|
|
|
for values in (1) partition by list (a);
|
|
|
|
|
-- leave this partition without children.
|
|
|
|
|
create table parted_index_col_drop2 partition of parted_index_col_drop
|
|
|
|
|
for values in (2) partition by list (a);
|
|
|
|
|
create table parted_index_col_drop11 partition of parted_index_col_drop1
|
|
|
|
|
for values in (1);
|
|
|
|
|
create index on parted_index_col_drop (b);
|
|
|
|
|
create index on parted_index_col_drop (c);
|
|
|
|
|
create index on parted_index_col_drop (b, c);
|
|
|
|
|
alter table parted_index_col_drop drop column c;
|
|
|
|
|
\d parted_index_col_drop
|
|
|
|
|
\d parted_index_col_drop1
|
|
|
|
|
\d parted_index_col_drop2
|
|
|
|
|
\d parted_index_col_drop11
|
|
|
|
|
drop table parted_index_col_drop;
|
Ignore invalid indexes when enforcing index rules in ALTER TABLE ATTACH PARTITION
A portion of ALTER TABLE .. ATTACH PARTITION is to ensure that the
partition being attached to the partitioned table has a correct set of
indexes, so as there is a consistent index mapping between the
partitioned table and its new-to-be partition. However, as introduced
in 8b08f7d, the current logic could choose an invalid index as a match,
which is something that can exist when dealing with more than two levels
of partitioning, like attaching a partitioned table (that has
partitions, with an index created by CREATE INDEX ON ONLY) to another
partitioned table.
A partitioned index with indisvalid set to false is equivalent to an
incomplete partition tree, meaning that an invalid partitioned index
does not have indexes defined in all its partitions. Hence, choosing an
invalid partitioned index can create inconsistent partition index trees,
where the parent attaching to is valid, but its partition may be
invalid.
In the report from Alexander Lakhin, this showed up as an assertion
failure when validating an index. Without assertions enabled, the
partition index tree would be actually broken, as indisvalid should
be switched to true for a partitioned index once all its partitions are
themselves valid. With two levels of partitioning, the top partitioned
table used a valid index and was able to link to an invalid index stored
on its partition, itself a partitioned table.
I have studied a few options here (like the possibility to switch
indisvalid to false for the parent), but came down to the conclusion
that we'd better rely on a simple rule: invalid indexes had better never
be chosen, so as the partition attached uses and creates indexes that
the parent expects. Some regression tests are added to provide some
coverage. Note that the existing coverage is not impacted.
This is a problem since partitioned indexes exist, so backpatch all the
way down to v11.
Reported-by: Alexander Lakhin
Discussion: https://postgr.es/14987634-43c0-0cb3-e075-94d423607e08@gmail.com
Backpatch-through: 11
3 years ago
|
|
|
|
|
|
|
|
-- Check that invalid indexes are not selected when attaching a partition.
|
|
|
|
|
create table parted_inval_tab (a int) partition by range (a);
|
|
|
|
|
create index parted_inval_idx on parted_inval_tab (a);
|
|
|
|
|
create table parted_inval_tab_1 (a int) partition by range (a);
|
|
|
|
|
create table parted_inval_tab_1_1 partition of parted_inval_tab_1
|
|
|
|
|
for values from (0) to (10);
|
|
|
|
|
create table parted_inval_tab_1_2 partition of parted_inval_tab_1
|
|
|
|
|
for values from (10) to (20);
|
|
|
|
|
-- this creates an invalid index.
|
|
|
|
|
create index parted_inval_ixd_1 on only parted_inval_tab_1 (a);
|
|
|
|
|
-- this creates new indexes for all the partitions of parted_inval_tab_1,
|
|
|
|
|
-- discarding the invalid index created previously as what is chosen.
|
|
|
|
|
alter table parted_inval_tab attach partition parted_inval_tab_1
|
|
|
|
|
for values from (1) to (100);
|
|
|
|
|
select indexrelid::regclass, indisvalid,
|
|
|
|
|
indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join
|
|
|
|
|
pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'parted_inval%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table parted_inval_tab;
|
|
|
|
|
|
|
|
|
|
-- Check setup of indisvalid across a complex partition tree on index
|
|
|
|
|
-- creation. If one index in a partition index is invalid, so should its
|
|
|
|
|
-- partitioned index.
|
|
|
|
|
create table parted_isvalid_tab (a int, b int) partition by range (a);
|
|
|
|
|
create table parted_isvalid_tab_1 partition of parted_isvalid_tab
|
|
|
|
|
for values from (1) to (10) partition by range (a);
|
|
|
|
|
create table parted_isvalid_tab_2 partition of parted_isvalid_tab
|
|
|
|
|
for values from (10) to (20) partition by range (a);
|
|
|
|
|
create table parted_isvalid_tab_11 partition of parted_isvalid_tab_1
|
|
|
|
|
for values from (1) to (5);
|
|
|
|
|
create table parted_isvalid_tab_12 partition of parted_isvalid_tab_1
|
|
|
|
|
for values from (5) to (10);
|
|
|
|
|
-- create an invalid index on one of the partitions.
|
|
|
|
|
insert into parted_isvalid_tab_11 values (1, 0);
|
|
|
|
|
create index concurrently parted_isvalid_idx_11 on parted_isvalid_tab_11 ((a/b));
|
|
|
|
|
-- The previous invalid index is selected, invalidating all the indexes up to
|
|
|
|
|
-- the top-most parent.
|
|
|
|
|
create index parted_isvalid_idx on parted_isvalid_tab ((a/b));
|
|
|
|
|
select indexrelid::regclass, indisvalid,
|
|
|
|
|
indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join
|
|
|
|
|
pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'parted_isvalid%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table parted_isvalid_tab;
|
Fix updates of indisvalid for partitioned indexes
indisvalid is switched to true for partitioned indexes when all its
partitions have valid indexes when attaching a new partition, up to the
top-most parent if all its leaves are themselves valid when dealing with
multiple layers of partitions.
The copy of the tuple from pg_index used to switch indisvalid to true
came from the relation cache, which is incorrect. Particularly, in the
case reported by Shruthi Gowda, executing a series of commands in a
single transaction would cause the validation of partitioned indexes to
use an incorrect version of a pg_index tuple, as indexes are reloaded
after an invalidation request with RelationReloadIndexInfo(), a much
faster version than a full index cache rebuild. In this case, the
limited information updated in the cache leads to an incorrect version
of the tuple used. One of the symptoms reported was the following
error, with a replica identity update, for instance:
"ERROR: attempted to update invisible tuple"
This is incorrect since 8b08f7d, so backpatch all the way down.
Reported-by: Shruthi Gowda
Author: Michael Paquier
Reviewed-by: Shruthi Gowda, Dilip Kumar
Discussion: https://postgr.es/m/CAASxf_PBcxax0wW-3gErUyftZ0XrCs3Lrpuhq4-Z3Fak1DoW7Q@mail.gmail.com
Backpatch-through: 11
3 years ago
|
|
|
|
|
|
|
|
-- Check state of replica indexes when attaching a partition.
|
|
|
|
|
begin;
|
|
|
|
|
create table parted_replica_tab (id int not null) partition by range (id);
|
|
|
|
|
create table parted_replica_tab_1 partition of parted_replica_tab
|
|
|
|
|
for values from (1) to (10) partition by range (id);
|
|
|
|
|
create table parted_replica_tab_11 partition of parted_replica_tab_1
|
|
|
|
|
for values from (1) to (5);
|
|
|
|
|
create unique index parted_replica_idx
|
|
|
|
|
on only parted_replica_tab using btree (id);
|
|
|
|
|
create unique index parted_replica_idx_1
|
|
|
|
|
on only parted_replica_tab_1 using btree (id);
|
|
|
|
|
-- This triggers an update of pg_index.indisreplident for parted_replica_idx.
|
|
|
|
|
alter table only parted_replica_tab_1 replica identity
|
|
|
|
|
using index parted_replica_idx_1;
|
|
|
|
|
create unique index parted_replica_idx_11 on parted_replica_tab_11 USING btree (id);
|
|
|
|
|
select indexrelid::regclass, indisvalid, indisreplident,
|
|
|
|
|
indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join
|
|
|
|
|
pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'parted_replica%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
-- parted_replica_idx is not valid yet here, because parted_replica_idx_1
|
|
|
|
|
-- is not valid.
|
|
|
|
|
alter index parted_replica_idx ATTACH PARTITION parted_replica_idx_1;
|
|
|
|
|
select indexrelid::regclass, indisvalid, indisreplident,
|
|
|
|
|
indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join
|
|
|
|
|
pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'parted_replica%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
-- parted_replica_idx becomes valid here.
|
|
|
|
|
alter index parted_replica_idx_1 ATTACH PARTITION parted_replica_idx_11;
|
|
|
|
|
alter table only parted_replica_tab_1 replica identity
|
|
|
|
|
using index parted_replica_idx_1;
|
|
|
|
|
commit;
|
|
|
|
|
select indexrelid::regclass, indisvalid, indisreplident,
|
|
|
|
|
indrelid::regclass, inhparent::regclass
|
|
|
|
|
from pg_index idx left join
|
|
|
|
|
pg_inherits inh on (idx.indexrelid = inh.inhrelid)
|
|
|
|
|
where indexrelid::regclass::text like 'parted_replica%'
|
|
|
|
|
order by indexrelid::regclass::text collate "C";
|
|
|
|
|
drop table parted_replica_tab;
|