mirror of https://github.com/postgres/postgres
Hash partitioning on an enum is problematic because the hash codes are derived from the OIDs assigned to the enum values, which will almost certainly be different after a dump-and-reload than they were before. This means that some rows probably end up in different partitions than before, causing restore to fail because of partition constraint violations. (pg_upgrade dodges this problem by using hacks to force the enum values to keep the same OIDs, but that's not possible nor desirable for pg_dump.) Users can work around that by specifying --load-via-partition-root, but since that's a dump-time not restore-time decision, one might find out the need for it far too late. Instead, teach pg_dump to apply that option automatically when dealing with a partitioned table that has hash-on-enum partitioning. Also deal with a pre-existing issue for --load-via-partition-root mode: in a parallel restore, we try to TRUNCATE target tables just before loading them, in order to enable some backend optimizations. This is bad when using --load-via-partition-root because (a) we're likely to suffer deadlocks from restore jobs trying to restore rows into other partitions than they came from, and (b) if we miss getting a deadlock we might still lose data due to a TRUNCATE removing rows from some already-completed restore job. The fix for this is conceptually simple: just don't TRUNCATE if we're dealing with a --load-via-partition-root case. The tricky bit is for pg_restore to identify those cases. In dumps using COPY commands we can inspect each COPY command to see if it targets the nominal target table or some ancestor. However, in dumps using INSERT commands it's pretty impractical to examine the INSERTs in advance. To provide a solution for that going forward, modify pg_dump to mark TABLE DATA items that are using --load-via-partition-root with a comment. (This change also responds to a complaint from Robert Haas that the dump output for --load-via-partition-root is pretty confusing.) pg_restore checks for the special comment as well as checking the COPY command if present. This will fail to identify the combination of --load-via-partition-root and --inserts in pre-existing dump files, but that should be a pretty rare case in the field. If it does happen you will probably get a deadlock failure that you can work around by not using parallel restore, which is the same as before this bug fix. Having done this, there seems no remaining reason for the alarmism in the pg_dump man page about combining --load-via-partition-root with parallel restore, so remove that warning. Patch by me; thanks to Julien Rouhaud for review. Back-patch to v11 where hash partitioning was introduced. Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.uspull/137/head
parent
de4d456b40
commit
bc8cd50fef
@ -0,0 +1,81 @@ |
||||
|
||||
# Copyright (c) 2021-2023, PostgreSQL Global Development Group |
||||
|
||||
use strict; |
||||
use warnings; |
||||
|
||||
use PostgreSQL::Test::Cluster; |
||||
use PostgreSQL::Test::Utils; |
||||
use Test::More; |
||||
|
||||
my $dbname1 = 'regression_src'; |
||||
my $dbname2 = 'regression_dest1'; |
||||
my $dbname3 = 'regression_dest2'; |
||||
|
||||
my $node = PostgreSQL::Test::Cluster->new('main'); |
||||
$node->init; |
||||
$node->start; |
||||
|
||||
my $backupdir = $node->backup_dir; |
||||
|
||||
$node->run_log([ 'createdb', $dbname1 ]); |
||||
$node->run_log([ 'createdb', $dbname2 ]); |
||||
$node->run_log([ 'createdb', $dbname3 ]); |
||||
|
||||
$node->safe_psql( |
||||
$dbname1, |
||||
qq{ |
||||
create type digit as enum ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'); |
||||
|
||||
-- plain table with index |
||||
create table tplain (en digit, data int unique); |
||||
insert into tplain select (x%10)::text::digit, x from generate_series(1,1000) x; |
||||
|
||||
-- non-troublesome hashed partitioning |
||||
create table ths (mod int, data int, unique(mod, data)) partition by hash(mod); |
||||
create table ths_p1 partition of ths for values with (modulus 3, remainder 0); |
||||
create table ths_p2 partition of ths for values with (modulus 3, remainder 1); |
||||
create table ths_p3 partition of ths for values with (modulus 3, remainder 2); |
||||
insert into ths select (x%10), x from generate_series(1,1000) x; |
||||
|
||||
-- dangerous hashed partitioning |
||||
create table tht (en digit, data int, unique(en, data)) partition by hash(en); |
||||
create table tht_p1 partition of tht for values with (modulus 3, remainder 0); |
||||
create table tht_p2 partition of tht for values with (modulus 3, remainder 1); |
||||
create table tht_p3 partition of tht for values with (modulus 3, remainder 2); |
||||
insert into tht select (x%10)::text::digit, x from generate_series(1,1000) x; |
||||
}); |
||||
|
||||
$node->command_ok( |
||||
[ |
||||
'pg_dump', '-Fd', '--no-sync', '-j2', '-f', "$backupdir/dump1", |
||||
$node->connstr($dbname1) |
||||
], |
||||
'parallel dump'); |
||||
|
||||
$node->command_ok( |
||||
[ |
||||
'pg_restore', '-v', |
||||
'-d', $node->connstr($dbname2), |
||||
'-j3', "$backupdir/dump1" |
||||
], |
||||
'parallel restore'); |
||||
|
||||
$node->command_ok( |
||||
[ |
||||
'pg_dump', '-Fd', |
||||
'--no-sync', '-j2', |
||||
'-f', "$backupdir/dump2", |
||||
'--inserts', $node->connstr($dbname1) |
||||
], |
||||
'parallel dump as inserts'); |
||||
|
||||
$node->command_ok( |
||||
[ |
||||
'pg_restore', '-v', |
||||
'-d', $node->connstr($dbname3), |
||||
'-j3', "$backupdir/dump2" |
||||
], |
||||
'parallel restore as inserts'); |
||||
|
||||
done_testing(); |
Loading…
Reference in new issue