mirror of https://github.com/postgres/postgres
The right mix of DDL and VACUUM could corrupt a catalog page header such that PageIsVerified() durably fails, requiring a restore from backup. This affects only catalogs that both have a syscache and have DDL code that uses syscache tuples to construct updates. One of the test permutations shows a variant not yet fixed. This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with TM_Deleted. I think core and PGXN are indifferent to that. Per bug #17821 from Alexander Lakhin. Back-patch to v13 (all supported versions). The test case is v17+, since it uses INJECTION_POINT. Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.orgpull/205/head
parent
1587f7b9fc
commit
f4af4515bb
@ -0,0 +1,87 @@ |
||||
Parsed test spec with 4 sessions |
||||
|
||||
starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
ERROR: tuple concurrently deleted |
||||
step wakegrant4: <... completed> |
||||
|
||||
starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step mkrels4: |
||||
SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED |
||||
|
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
ERROR: duplicate key value violates unique constraint "pg_class_oid_index" |
||||
step wakegrant4: <... completed> |
||||
|
||||
starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 |
||||
step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step mkrels4: |
||||
SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED |
||||
|
||||
step r3: ROLLBACK; |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; |
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
step wakegrant4: <... completed> |
||||
step inspect4: |
||||
SELECT relhastriggers, relhassubclass FROM pg_class |
||||
WHERE oid = 'vactest.orig50'::regclass; |
||||
|
||||
relhastriggers|relhassubclass |
||||
--------------+-------------- |
||||
f |f |
||||
(1 row) |
||||
|
||||
@ -0,0 +1,86 @@ |
||||
Parsed test spec with 4 sessions |
||||
|
||||
starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
step wakegrant4: <... completed> |
||||
|
||||
starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step mkrels4: |
||||
SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED |
||||
|
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
step wakegrant4: <... completed> |
||||
|
||||
starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 |
||||
step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; |
||||
step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); |
||||
step at2: |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
<waiting ...> |
||||
step mkrels4: |
||||
SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED |
||||
|
||||
step r3: ROLLBACK; |
||||
step waitprunable4: CALL vactest.wait_prunable(); |
||||
step vac4: VACUUM pg_class; |
||||
step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...> |
||||
step wakeinval4: |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
<waiting ...> |
||||
step at2: <... completed> |
||||
step wakeinval4: <... completed> |
||||
step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; |
||||
step wakegrant4: |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
<waiting ...> |
||||
step grant1: <... completed> |
||||
ERROR: tuple concurrently updated |
||||
step wakegrant4: <... completed> |
||||
step inspect4: |
||||
SELECT relhastriggers, relhassubclass FROM pg_class |
||||
WHERE oid = 'vactest.orig50'::regclass; |
||||
|
||||
relhastriggers|relhassubclass |
||||
--------------+-------------- |
||||
t |t |
||||
(1 row) |
||||
|
||||
@ -0,0 +1,71 @@ |
||||
/*--------------------------------------------------------------------------
|
||||
* |
||||
* regress_injection.c |
||||
* Functions supporting test-specific subject matter. |
||||
* |
||||
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* IDENTIFICATION |
||||
* src/test/modules/injection_points/regress_injection.c |
||||
* |
||||
* ------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "access/table.h" |
||||
#include "fmgr.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/procarray.h" |
||||
#include "utils/xid8.h" |
||||
|
||||
/*
|
||||
* removable_cutoff - for syscache-update-pruned.spec |
||||
* |
||||
* Wrapper around GetOldestNonRemovableTransactionId(). In general, this can |
||||
* move backward. runningcheck=false isolation tests can reasonably prevent |
||||
* that. For the causes of backward movement, see |
||||
* postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com |
||||
* and the header comment for ComputeXidHorizons(). One can assume this |
||||
* doesn't move backward if one arranges for concurrent activity not to reach |
||||
* AbortTransaction() and not to allocate an XID while connected to another |
||||
* database. Non-runningcheck tests can control most concurrent activity, |
||||
* except autovacuum and the isolationtester control connection. Neither |
||||
* allocates XIDs, and AbortTransaction() in those would justify test failure. |
||||
*/ |
||||
PG_FUNCTION_INFO_V1(removable_cutoff); |
||||
Datum |
||||
removable_cutoff(PG_FUNCTION_ARGS) |
||||
{ |
||||
Relation rel = NULL; |
||||
TransactionId xid; |
||||
FullTransactionId next_fxid_before, |
||||
next_fxid; |
||||
|
||||
/* could take other relkinds callee takes, but we've not yet needed it */ |
||||
if (!PG_ARGISNULL(0)) |
||||
rel = table_open(PG_GETARG_OID(0), AccessShareLock); |
||||
|
||||
/*
|
||||
* No lock or snapshot necessarily prevents oldestXid from advancing past |
||||
* "xid" while this function runs. That concerns us only in that we must |
||||
* not ascribe "xid" to the wrong epoch. (That may never arise in |
||||
* isolation testing, but let's set a good example.) As a crude solution, |
||||
* retry until nextXid doesn't change. |
||||
*/ |
||||
next_fxid = ReadNextFullTransactionId(); |
||||
do |
||||
{ |
||||
CHECK_FOR_INTERRUPTS(); |
||||
next_fxid_before = next_fxid; |
||||
xid = GetOldestNonRemovableTransactionId(rel); |
||||
next_fxid = ReadNextFullTransactionId(); |
||||
} while (!FullTransactionIdEquals(next_fxid, next_fxid_before)); |
||||
|
||||
if (rel) |
||||
table_close(rel, AccessShareLock); |
||||
|
||||
PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid, |
||||
xid)); |
||||
} |
||||
@ -0,0 +1,179 @@ |
||||
# Test race conditions involving: |
||||
# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin |
||||
# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple |
||||
# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED |
||||
|
||||
# This is a derivative work of inplace.spec, which exercises the corresponding |
||||
# race condition for inplace updates. |
||||
|
||||
# Despite local injection points, this is incompatible with runningcheck. |
||||
# First, removable_cutoff() could move backward, per its header comment. |
||||
# Second, other activity could trigger sinval queue overflow, negating our |
||||
# efforts to delay inval. Third, this deadlock emerges: |
||||
# |
||||
# - step at2 waits at an injection point, with interrupts held |
||||
# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE |
||||
# - step waitprunable4 waits for the unrelated backend to release its xmin |
||||
|
||||
# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting |
||||
# that thwarts testing the race conditions this spec seeks. |
||||
|
||||
|
||||
# Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave |
||||
# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row |
||||
# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save |
||||
# on filesystem syscalls, use relkind=c for every other rel. |
||||
setup |
||||
{ |
||||
CREATE EXTENSION injection_points; |
||||
CREATE SCHEMA vactest; |
||||
-- Ensure a leader RELOID catcache entry. PARALLEL RESTRICTED since a |
||||
-- parallel worker running pg_relation_filenode() would lack that effect. |
||||
CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int |
||||
LANGUAGE sql PARALLEL RESTRICTED |
||||
AS 'SELECT 0 FROM pg_relation_filenode($1)'; |
||||
CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void |
||||
LANGUAGE plpgsql SET search_path = vactest AS $$ |
||||
DECLARE |
||||
tname text; |
||||
BEGIN |
||||
FOR i in $2 .. $3 LOOP |
||||
tname := $1 || i; |
||||
EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()'); |
||||
RAISE DEBUG '% at %', tname, ctid |
||||
FROM pg_class WHERE oid = tname::regclass; |
||||
END LOOP; |
||||
END |
||||
$$; |
||||
CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$ |
||||
DECLARE |
||||
barrier xid8; |
||||
cutoff xid8; |
||||
BEGIN |
||||
barrier := pg_current_xact_id(); |
||||
-- autovacuum worker RelationCacheInitializePhase3() or the |
||||
-- isolationtester control connection might hold a snapshot that |
||||
-- limits pruning. Sleep until that clears. |
||||
LOOP |
||||
ROLLBACK; -- release MyProc->xmin, which could be the oldest |
||||
cutoff := removable_cutoff('pg_class'); |
||||
EXIT WHEN cutoff >= barrier; |
||||
RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier; |
||||
PERFORM pg_sleep(.1); |
||||
END LOOP; |
||||
END |
||||
$$; |
||||
} |
||||
setup { CALL vactest.wait_prunable(); -- maximize next two VACUUMs } |
||||
setup { VACUUM FULL pg_class; -- reduce free space } |
||||
setup { VACUUM FREEZE pg_class; -- populate fsm etc. } |
||||
setup |
||||
{ |
||||
SELECT FROM vactest.mkrels('orig', 1, 49); |
||||
CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off); |
||||
CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off); |
||||
SELECT FROM vactest.mkrels('orig', 51, 100); |
||||
} |
||||
teardown |
||||
{ |
||||
DROP SCHEMA vactest CASCADE; |
||||
DROP EXTENSION injection_points; |
||||
} |
||||
|
||||
# Wait during GRANT. Disable debug_discard_caches, since we're here to |
||||
# exercise an outcome that happens under permissible cache staleness. |
||||
session s1 |
||||
setup { |
||||
SET debug_discard_caches = 0; |
||||
SELECT FROM injection_points_set_local(); |
||||
SELECT FROM injection_points_attach('heap_update-before-pin', 'wait'); |
||||
} |
||||
step cachefill1 { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); } |
||||
step grant1 { GRANT SELECT ON vactest.orig50 TO PUBLIC; } |
||||
|
||||
# Update of the tuple that grant1 will update. Wait before sending invals, so |
||||
# s1 will not get a cache miss. Choose the commands for making such updates |
||||
# from among those whose heavyweight locking does not conflict with GRANT's |
||||
# heavyweight locking. (GRANT will see our XID as committed, so observing |
||||
# that XID in the tuple xmax also won't block GRANT.) |
||||
session s2 |
||||
setup { |
||||
SELECT FROM injection_points_set_local(); |
||||
SELECT FROM |
||||
injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait'); |
||||
} |
||||
step at2 { |
||||
CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 |
||||
FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); |
||||
} |
||||
|
||||
# Hold snapshot to block pruning. |
||||
session s3 |
||||
step snap3 { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; } |
||||
step r3 { ROLLBACK; } |
||||
|
||||
# Non-blocking actions. |
||||
session s4 |
||||
step waitprunable4 { CALL vactest.wait_prunable(); } |
||||
step vac4 { VACUUM pg_class; } |
||||
# Reuse the lp that s1 is waiting to change. I've observed reuse at the 1st |
||||
# or 18th CREATE, so create excess. |
||||
step mkrels4 { |
||||
SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED |
||||
} |
||||
step wakegrant4 { |
||||
SELECT FROM injection_points_detach('heap_update-before-pin'); |
||||
SELECT FROM injection_points_wakeup('heap_update-before-pin'); |
||||
} |
||||
step at4 { ALTER TABLE vactest.child50 INHERIT vactest.orig50; } |
||||
step wakeinval4 { |
||||
SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); |
||||
SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); |
||||
} |
||||
# Witness effects of steps at2 and/or at4. |
||||
step inspect4 { |
||||
SELECT relhastriggers, relhassubclass FROM pg_class |
||||
WHERE oid = 'vactest.orig50'::regclass; |
||||
} |
||||
|
||||
# TID from syscache becomes LP_UNUSED. Before the bug fix, this permutation |
||||
# made s1 fail with "attempted to update invisible tuple" or an assert. |
||||
# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff) |
||||
# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted. |
||||
# Persistent page header corruption ensued. For example, s1 overwrote |
||||
# pd_lower, pd_upper, and pd_special as though they were t_ctid. |
||||
permutation |
||||
cachefill1 # reads pg_class tuple T0, xmax invalid |
||||
at2 # T0 dead, T1 live |
||||
waitprunable4 # T0 prunable |
||||
vac4 # T0 becomes LP_UNUSED |
||||
grant1 # pauses at heap_update(T0) |
||||
wakeinval4(at2) # at2 sends inval message |
||||
wakegrant4(grant1) # s1 wakes: "tuple concurrently deleted" |
||||
|
||||
# add mkrels4: LP_UNUSED becomes a different rel's row |
||||
permutation |
||||
cachefill1 # reads pg_class tuple T0, xmax invalid |
||||
at2 # T0 dead, T1 live |
||||
waitprunable4 # T0 prunable |
||||
vac4 # T0 becomes LP_UNUSED |
||||
grant1 # pauses at heap_update(T0) |
||||
wakeinval4(at2) # at2 sends inval message |
||||
mkrels4 # T0 becomes a new rel |
||||
wakegrant4(grant1) # s1 wakes: "duplicate key value violates unique" |
||||
|
||||
# TID from syscache becomes LP_UNUSED, then becomes a newer version of the |
||||
# original rel's row. |
||||
permutation |
||||
snap3 # sets MyProc->xmin |
||||
cachefill1 # reads pg_class tuple T0, xmax invalid |
||||
at2 # T0 dead, T1 live |
||||
mkrels4 # T1's page becomes full |
||||
r3 # clears MyProc->xmin |
||||
waitprunable4 # T0 prunable |
||||
vac4 # T0 becomes LP_UNUSED |
||||
grant1 # pauses at heap_update(T0) |
||||
wakeinval4(at2) # at2 sends inval message |
||||
at4 # T1 dead, T0 live |
||||
wakegrant4(grant1) # s1 wakes: T0 dead, T2 live |
||||
inspect4 # observe loss of at2+at4 changes XXX is an extant bug |
||||
Loading…
Reference in new issue