mirror of https://github.com/postgres/postgres
For correctness of summarization results, it is critical that the snapshot used during the summarization scan is able to see all tuples that are live to all transactions -- including tuples inserted or deleted by in-progress transactions. Otherwise, it would be possible for a transaction to insert a tuple, then idle for a long time while a concurrent transaction executes summarization of the range: this would result in the inserted value not being considered in the summary. Previously we were trying to use a MVCC snapshot in conjunction with adding a "placeholder" tuple in the index: the snapshot would see all committed tuples, and the placeholder tuple would catch insertions by any new inserters. The hole is that prior insertions by transactions that are still in progress by the time the MVCC snapshot was taken were ignored. Kevin Grittner reported this as a bogus error message during vacuum with default transaction isolation mode set to repeatable read (because the error report mentioned a function name not being invoked during), but the problem is larger than that. To fix, tweak IndexBuildHeapRangeScan to have a new mode that behaves the way we need using SnapshotAny visibility rules. This change simplifies the BRIN code a bit, mainly by removing large comments that were mistaken. Instead, rely on the SnapshotAny semantics to provide what it needs. (The business about a placeholder tuple needs to remain: that covers the case that a transaction inserts a a tuple in a page that summarization already scanned.) Discussion: https://www.postgresql.org/message-id/20150731175700.GX2441@postgresql.org In passing, remove a couple of unused declarations from brin.h and reword a comment to be proper English. This part submitted by Kevin Grittner. Backpatch to 9.5, where BRIN was introduced.pull/7/head
parent
6af9ee4c8c
commit
2834855cb9
@ -0,0 +1,39 @@ |
||||
Parsed test spec with 2 sessions |
||||
|
||||
starting permutation: s2check s1b s2b s1i s2summ s1c s2c s2check |
||||
step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); |
||||
itemoffset blknum attnum allnulls hasnulls placeholder value |
||||
|
||||
1 0 1 f f f {1 .. 1} |
||||
step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; |
||||
step s2b: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT 1; |
||||
?column? |
||||
|
||||
1 |
||||
step s1i: INSERT INTO brin_iso VALUES (1000); |
||||
step s2summ: SELECT brin_summarize_new_values('brinidx'::regclass); |
||||
brin_summarize_new_values |
||||
|
||||
1 |
||||
step s1c: COMMIT; |
||||
step s2c: COMMIT; |
||||
step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); |
||||
itemoffset blknum attnum allnulls hasnulls placeholder value |
||||
|
||||
1 0 1 f f f {1 .. 1} |
||||
2 1 1 f f f {1 .. 1000} |
||||
|
||||
starting permutation: s2check s1b s1i s2vacuum s1c s2check |
||||
step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); |
||||
itemoffset blknum attnum allnulls hasnulls placeholder value |
||||
|
||||
1 0 1 f f f {1 .. 1} |
||||
step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; |
||||
step s1i: INSERT INTO brin_iso VALUES (1000); |
||||
step s2vacuum: VACUUM brin_iso; |
||||
step s1c: COMMIT; |
||||
step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); |
||||
itemoffset blknum attnum allnulls hasnulls placeholder value |
||||
|
||||
1 0 1 f f f {1 .. 1} |
||||
2 1 1 f f f {1 .. 1000} |
@ -0,0 +1,44 @@ |
||||
# This test verifies that values inserted in transactions still in progress |
||||
# are considered during concurrent range summarization (either using the |
||||
# brin_summarize_new_values function or regular VACUUM). |
||||
|
||||
setup |
||||
{ |
||||
CREATE TABLE brin_iso ( |
||||
value int |
||||
) WITH (fillfactor=10); |
||||
CREATE INDEX brinidx ON brin_iso USING brin (value) WITH (pages_per_range=1); |
||||
-- this fills the first page |
||||
DO $$ |
||||
DECLARE curtid tid; |
||||
BEGIN |
||||
LOOP |
||||
INSERT INTO brin_iso VALUES (1) RETURNING ctid INTO curtid; |
||||
EXIT WHEN curtid > tid '(1, 0)'; |
||||
END LOOP; |
||||
END; |
||||
$$; |
||||
CREATE EXTENSION IF NOT EXISTS pageinspect; |
||||
} |
||||
|
||||
teardown |
||||
{ |
||||
DROP TABLE brin_iso; |
||||
} |
||||
|
||||
session "s1" |
||||
step "s1b" { BEGIN ISOLATION LEVEL REPEATABLE READ; } |
||||
step "s1i" { INSERT INTO brin_iso VALUES (1000); } |
||||
step "s1c" { COMMIT; } |
||||
|
||||
session "s2" |
||||
step "s2b" { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT 1; } |
||||
step "s2summ" { SELECT brin_summarize_new_values('brinidx'::regclass); } |
||||
step "s2c" { COMMIT; } |
||||
|
||||
step "s2vacuum" { VACUUM brin_iso; } |
||||
|
||||
step "s2check" { SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); } |
||||
|
||||
permutation "s2check" "s1b" "s2b" "s1i" "s2summ" "s1c" "s2c" "s2check" |
||||
permutation "s2check" "s1b" "s1i" "s2vacuum" "s1c" "s2check" |
Loading…
Reference in new issue