mirror of https://github.com/postgres/postgres
Snapshot caching, introduced in 623a9ba79b
, did not increment
xactCompletionCount during subtransaction abort. That could lead to an older
snapshot being reused. That is, at least as far as I can see, not a
correctness issue (for MVCC snapshots there's no difference between "in
progress" and "aborted"). The only difference between the old and new
snapshots would be a newer ->xmax.
While HeapTupleSatisfiesMVCC makes the same visibility determination, reusing
the old snapshot leads HeapTupleSatisfiesMVCC to not set
HEAP_XMIN_INVALID. Which subsequently causes the kill_prior_tuple optimization
to not kick in (via HeapTupleIsSurelyDead() returning false). The performance
effects of doing the same index-lookups over and over again is how the issue
was discovered...
Fix the issue by incrementing xactCompletionCount in
XidCacheRemoveRunningXids. It already acquires ProcArrayLock exclusively,
making that an easy proposition.
Add a test to ensure that kill_prior_tuple prevents index growth when it
involves aborted subtransaction of the current transaction.
Author: Andres Freund
Discussion: https://postgr.es/m/20210406043521.lopeo7bbigad3n6t@alap3.anarazel.de
Discussion: https://postgr.es/m/20210317055718.v6qs3ltzrformqoa%40alap3.anarazel.de
pull/64/head
parent
8523492d4e
commit
90c885cdab
@ -0,0 +1,42 @@ |
||||
-- |
||||
-- Verify that index scans encountering dead rows produced by an |
||||
-- aborted subtransaction of the current transaction can utilize the |
||||
-- kill_prio_tuple optimization |
||||
-- |
||||
-- NB: The table size is currently *not* expected to stay the same, we |
||||
-- don't have logic to trigger opportunistic pruning in cases like |
||||
-- this. |
||||
BEGIN; |
||||
SET LOCAL enable_seqscan = false; |
||||
SET LOCAL enable_indexonlyscan = false; |
||||
SET LOCAL enable_bitmapscan = false; |
||||
-- Can't easily use a unique index, since dead tuples can be found |
||||
-- independent of the kill_prior_tuples optimization. |
||||
CREATE TABLE clean_aborted_self(key int, data text); |
||||
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key); |
||||
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage'); |
||||
-- save index size from before the changes, for comparison |
||||
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset |
||||
DO $$ |
||||
BEGIN |
||||
-- iterate often enough to see index growth even on larger-than-default page sizes |
||||
FOR i IN 1..100 LOOP |
||||
BEGIN |
||||
-- perform index scan over all the inserted keys to get them to be seen as dead |
||||
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN |
||||
RAISE data_corrupted USING MESSAGE = 'these rows should not exist'; |
||||
END IF; |
||||
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i); |
||||
-- just some error that's not normally thrown |
||||
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again'; |
||||
EXCEPTION WHEN reading_sql_data_not_permitted THEN END; |
||||
END LOOP; |
||||
END;$$; |
||||
-- show sizes only if they differ |
||||
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after |
||||
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key'); |
||||
size_before | size_after |
||||
-------------+------------ |
||||
(0 rows) |
||||
|
||||
ROLLBACK; |
@ -0,0 +1,44 @@ |
||||
-- |
||||
-- Verify that index scans encountering dead rows produced by an |
||||
-- aborted subtransaction of the current transaction can utilize the |
||||
-- kill_prio_tuple optimization |
||||
-- |
||||
-- NB: The table size is currently *not* expected to stay the same, we |
||||
-- don't have logic to trigger opportunistic pruning in cases like |
||||
-- this. |
||||
BEGIN; |
||||
|
||||
SET LOCAL enable_seqscan = false; |
||||
SET LOCAL enable_indexonlyscan = false; |
||||
SET LOCAL enable_bitmapscan = false; |
||||
|
||||
-- Can't easily use a unique index, since dead tuples can be found |
||||
-- independent of the kill_prior_tuples optimization. |
||||
CREATE TABLE clean_aborted_self(key int, data text); |
||||
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key); |
||||
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage'); |
||||
|
||||
-- save index size from before the changes, for comparison |
||||
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset |
||||
|
||||
DO $$ |
||||
BEGIN |
||||
-- iterate often enough to see index growth even on larger-than-default page sizes |
||||
FOR i IN 1..100 LOOP |
||||
BEGIN |
||||
-- perform index scan over all the inserted keys to get them to be seen as dead |
||||
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN |
||||
RAISE data_corrupted USING MESSAGE = 'these rows should not exist'; |
||||
END IF; |
||||
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i); |
||||
-- just some error that's not normally thrown |
||||
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again'; |
||||
EXCEPTION WHEN reading_sql_data_not_permitted THEN END; |
||||
END LOOP; |
||||
END;$$; |
||||
|
||||
-- show sizes only if they differ |
||||
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after |
||||
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key'); |
||||
|
||||
ROLLBACK; |
Loading…
Reference in new issue