mirror of https://github.com/postgres/postgres
This reverts the following set of commits, following complaints about the lack of portability of the central part of the code in bufmgr.c as well as the use of partition mapping locks during page reads:pull/57/headc780a7a9
f2b88396
b787d4ce
ce7f772c
60a51c6b
Per discussion with Andres Freund, Robert Haas and myself. Bump catalog version. Discussion: https://postgr.es/m/20201029181729.2nrub47u7yqncsv7@alap3.anarazel.de
parent
90851d1d26
commit
e152506ade
@ -1,229 +0,0 @@ |
||||
/*-------------------------------------------------------------------------
|
||||
* |
||||
* pagefuncs.c |
||||
* Functions for features related to relation pages. |
||||
* |
||||
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group |
||||
* Portions Copyright (c) 1994, Regents of the University of California |
||||
* |
||||
* |
||||
* IDENTIFICATION |
||||
* src/backend/utils/adt/pagefuncs.c |
||||
* |
||||
*------------------------------------------------------------------------- |
||||
*/ |
||||
|
||||
#include "postgres.h" |
||||
|
||||
#include "access/relation.h" |
||||
#include "funcapi.h" |
||||
#include "miscadmin.h" |
||||
#include "storage/bufmgr.h" |
||||
#include "storage/lmgr.h" |
||||
#include "storage/smgr.h" |
||||
#include "utils/builtins.h" |
||||
#include "utils/syscache.h" |
||||
|
||||
static void check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, |
||||
Oid relid, ForkNumber single_forknum); |
||||
static void check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, |
||||
Relation relation, ForkNumber forknum); |
||||
|
||||
/*
|
||||
* callback arguments for check_pages_error_callback() |
||||
*/ |
||||
typedef struct CheckPagesErrorInfo |
||||
{ |
||||
char *path; |
||||
BlockNumber blkno; |
||||
} CheckPagesErrorInfo; |
||||
|
||||
/*
|
||||
* Error callback specific to check_relation_fork(). |
||||
*/ |
||||
static void |
||||
check_pages_error_callback(void *arg) |
||||
{ |
||||
CheckPagesErrorInfo *errinfo = (CheckPagesErrorInfo *) arg; |
||||
|
||||
errcontext("while checking page %u of path %s", |
||||
errinfo->blkno, errinfo->path); |
||||
} |
||||
|
||||
/*
|
||||
* pg_relation_check_pages |
||||
* |
||||
* Check the state of all the pages for one or more fork types in the given |
||||
* relation. |
||||
*/ |
||||
Datum |
||||
pg_relation_check_pages(PG_FUNCTION_ARGS) |
||||
{ |
||||
Oid relid; |
||||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; |
||||
TupleDesc tupdesc; |
||||
Tuplestorestate *tupstore; |
||||
MemoryContext per_query_ctx; |
||||
MemoryContext oldcontext; |
||||
ForkNumber forknum; |
||||
|
||||
/* Switch into long-lived context to construct returned data structures */ |
||||
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; |
||||
oldcontext = MemoryContextSwitchTo(per_query_ctx); |
||||
|
||||
/* Build a tuple descriptor for our result type */ |
||||
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) |
||||
elog(ERROR, "return type must be a row type"); |
||||
|
||||
tupstore = tuplestore_begin_heap(true, false, work_mem); |
||||
rsinfo->returnMode = SFRM_Materialize; |
||||
rsinfo->setResult = tupstore; |
||||
rsinfo->setDesc = tupdesc; |
||||
|
||||
MemoryContextSwitchTo(oldcontext); |
||||
|
||||
/* handle arguments */ |
||||
if (PG_ARGISNULL(0)) |
||||
{ |
||||
/* Just leave if nothing is defined */ |
||||
PG_RETURN_VOID(); |
||||
} |
||||
|
||||
/* By default all the forks of a relation are checked */ |
||||
if (PG_ARGISNULL(1)) |
||||
forknum = InvalidForkNumber; |
||||
else |
||||
{ |
||||
const char *forkname = TextDatumGetCString(PG_GETARG_TEXT_PP(1)); |
||||
|
||||
forknum = forkname_to_number(forkname); |
||||
} |
||||
|
||||
relid = PG_GETARG_OID(0); |
||||
|
||||
check_one_relation(tupdesc, tupstore, relid, forknum); |
||||
tuplestore_donestoring(tupstore); |
||||
|
||||
return (Datum) 0; |
||||
} |
||||
|
||||
/*
|
||||
* Perform the check on a single relation, possibly filtered with a single |
||||
* fork. This function will check if the given relation exists or not, as |
||||
* a relation could be dropped after checking for the list of relations and |
||||
* before getting here, and we don't want to error out in this case. |
||||
*/ |
||||
static void |
||||
check_one_relation(TupleDesc tupdesc, Tuplestorestate *tupstore, |
||||
Oid relid, ForkNumber single_forknum) |
||||
{ |
||||
Relation relation; |
||||
ForkNumber forknum; |
||||
|
||||
/* Check if relation exists. leaving if there is no such relation */ |
||||
if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid))) |
||||
return; |
||||
|
||||
relation = relation_open(relid, AccessShareLock); |
||||
|
||||
/*
|
||||
* Sanity checks, returning no results if not supported. Temporary |
||||
* relations and relations without storage are out of scope. |
||||
*/ |
||||
if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind) || |
||||
relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) |
||||
{ |
||||
relation_close(relation, AccessShareLock); |
||||
return; |
||||
} |
||||
|
||||
RelationOpenSmgr(relation); |
||||
|
||||
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) |
||||
{ |
||||
if (single_forknum != InvalidForkNumber && single_forknum != forknum) |
||||
continue; |
||||
|
||||
if (smgrexists(relation->rd_smgr, forknum)) |
||||
check_relation_fork(tupdesc, tupstore, relation, forknum); |
||||
} |
||||
|
||||
relation_close(relation, AccessShareLock); |
||||
} |
||||
|
||||
/*
|
||||
* For a given relation and fork, do the real work of iterating over all pages |
||||
* and doing the check. Caller must hold an AccessShareLock lock on the given |
||||
* relation. |
||||
*/ |
||||
static void |
||||
check_relation_fork(TupleDesc tupdesc, Tuplestorestate *tupstore, |
||||
Relation relation, ForkNumber forknum) |
||||
{ |
||||
BlockNumber blkno, |
||||
nblocks; |
||||
SMgrRelation smgr = relation->rd_smgr; |
||||
char *path; |
||||
CheckPagesErrorInfo errinfo; |
||||
ErrorContextCallback errcallback; |
||||
|
||||
/* Number of output arguments in the SRF */ |
||||
#define PG_CHECK_RELATION_COLS 2 |
||||
|
||||
Assert(CheckRelationLockedByMe(relation, AccessShareLock, true)); |
||||
|
||||
/*
|
||||
* We remember the number of blocks here. Since caller must hold a lock |
||||
* on the relation, we know that it won't be truncated while we are |
||||
* iterating over the blocks. Any block added after this function started |
||||
* will not be checked. |
||||
*/ |
||||
nblocks = RelationGetNumberOfBlocksInFork(relation, forknum); |
||||
|
||||
path = relpathbackend(smgr->smgr_rnode.node, |
||||
smgr->smgr_rnode.backend, |
||||
forknum); |
||||
|
||||
/*
|
||||
* Error context to print some information about blocks and relations |
||||
* impacted by corruptions. |
||||
*/ |
||||
errinfo.path = pstrdup(path); |
||||
errinfo.blkno = 0; |
||||
errcallback.callback = check_pages_error_callback; |
||||
errcallback.arg = (void *) &errinfo; |
||||
errcallback.previous = error_context_stack; |
||||
error_context_stack = &errcallback; |
||||
|
||||
for (blkno = 0; blkno < nblocks; blkno++) |
||||
{ |
||||
Datum values[PG_CHECK_RELATION_COLS]; |
||||
bool nulls[PG_CHECK_RELATION_COLS]; |
||||
int i = 0; |
||||
|
||||
/* Update block number for the error context */ |
||||
errinfo.blkno = blkno; |
||||
|
||||
CHECK_FOR_INTERRUPTS(); |
||||
|
||||
/* Check the given buffer */ |
||||
if (CheckBuffer(smgr, forknum, blkno)) |
||||
continue; |
||||
|
||||
memset(values, 0, sizeof(values)); |
||||
memset(nulls, 0, sizeof(nulls)); |
||||
|
||||
values[i++] = CStringGetTextDatum(path); |
||||
values[i++] = Int64GetDatum((int64) blkno); |
||||
|
||||
Assert(i == PG_CHECK_RELATION_COLS); |
||||
|
||||
/* Save the corrupted blocks in the tuplestore. */ |
||||
tuplestore_putvalues(tupstore, tupdesc, values, nulls); |
||||
} |
||||
|
||||
pfree(path); |
||||
|
||||
/* Pop the error context stack */ |
||||
error_context_stack = errcallback.previous; |
||||
} |
@ -1,231 +0,0 @@ |
||||
# Emulate on-disk corruptions of relation pages and find such corruptions |
||||
# using pg_relation_check_pages(). |
||||
|
||||
use strict; |
||||
use warnings; |
||||
|
||||
use PostgresNode; |
||||
use TestLib; |
||||
use Test::More tests => 20; |
||||
|
||||
our $CHECKSUM_UINT16_OFFSET = 4; |
||||
our $PD_UPPER_UINT16_OFFSET = 7; |
||||
our $BLOCKSIZE; |
||||
our $TOTAL_NB_ERR = 0; |
||||
|
||||
# Grab a relation page worth a size of BLOCKSIZE from given $filename. |
||||
# $blkno is the same block number as for a relation file. |
||||
sub read_page |
||||
{ |
||||
my ($filename, $blkno) = @_; |
||||
my $block; |
||||
|
||||
open(my $infile, '<', $filename) or die; |
||||
binmode($infile); |
||||
|
||||
my $success = read($infile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); |
||||
die($!) if !defined($success); |
||||
|
||||
close($infile); |
||||
|
||||
return ($block); |
||||
} |
||||
|
||||
# Update an existing page of size BLOCKSIZE with new contents in given |
||||
# $filename. $blkno is the block number assigned in the relation file. |
||||
sub write_page |
||||
{ |
||||
my ($filename, $block, $blkno) = @_; |
||||
|
||||
open(my $outfile, '>', $filename) or die; |
||||
binmode($outfile); |
||||
|
||||
my $nb = syswrite($outfile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); |
||||
|
||||
die($!) if not defined $nb; |
||||
die("Write error") if ($nb != $BLOCKSIZE); |
||||
|
||||
$outfile->flush(); |
||||
|
||||
close($outfile); |
||||
return; |
||||
} |
||||
|
||||
# Read 2 bytes from relation page at a given offset. |
||||
sub get_uint16_from_page |
||||
{ |
||||
my ($block, $offset) = @_; |
||||
|
||||
return (unpack("S*", $block))[$offset]; |
||||
} |
||||
|
||||
# Write 2 bytes to relation page at a given offset. |
||||
sub set_uint16_to_page |
||||
{ |
||||
my ($block, $data, $offset) = @_; |
||||
|
||||
my $pack = pack("S", $data); |
||||
|
||||
# vec with 16B or more won't preserve endianness. |
||||
vec($block, 2 * $offset, 8) = (unpack('C*', $pack))[0]; |
||||
vec($block, (2 * $offset) + 1, 8) = (unpack('C*', $pack))[1]; |
||||
|
||||
return $block; |
||||
} |
||||
|
||||
# Sanity check on pg_stat_database looking after the number of checksum |
||||
# failures. |
||||
sub check_pg_stat_database |
||||
{ |
||||
my ($node, $test_prefix) = @_; |
||||
|
||||
my $stdout = $node->safe_psql('postgres', |
||||
"SELECT " |
||||
. " sum(checksum_failures)" |
||||
. " FROM pg_catalog.pg_stat_database"); |
||||
is($stdout, $TOTAL_NB_ERR, |
||||
"$test_prefix: pg_stat_database should have $TOTAL_NB_ERR error"); |
||||
|
||||
return; |
||||
} |
||||
|
||||
# Run a round of page checks for any relation present in this test run. |
||||
# $expected_broken is the psql output marking all the pages found as |
||||
# corrupted using relname|blkno as format for each tuple returned. $nb |
||||
# is the new number of checksum errors added to the global counter |
||||
# matched with the contents of pg_stat_database. |
||||
# |
||||
# Note that this has no need to check system relations as these would have |
||||
# no corruptions: this test does not manipulate them and should by no mean |
||||
# break the cluster. |
||||
sub run_page_checks |
||||
{ |
||||
my ($node, $num_checksum, $expected_broken, $test_prefix) = @_; |
||||
|
||||
my $stdout = $node->safe_psql('postgres', |
||||
"SELECT relname, failed_block_num" |
||||
. " FROM (SELECT relname, (pg_catalog.pg_relation_check_pages(oid)).*" |
||||
. " FROM pg_class " |
||||
. " WHERE relkind in ('r','i', 'm') AND oid >= 16384) AS s"); |
||||
|
||||
# Check command result |
||||
is($stdout, $expected_broken, |
||||
"$test_prefix: output mismatch with pg_relation_check_pages()"); |
||||
|
||||
$TOTAL_NB_ERR += $num_checksum; |
||||
return; |
||||
} |
||||
|
||||
# Perform various tests that modify a specified block at the given |
||||
# offset, checking that a page corruption is correctly detected. The |
||||
# original contents of the page are restored back once done. |
||||
# $broken_pages is the set of pages that are expected to be broken |
||||
# as of the returned result of pg_relation_check_pages(). $num_checksum |
||||
# is the number of checksum failures expected to be added to the contents |
||||
# of pg_stat_database after this function is done. |
||||
sub corrupt_and_test_block |
||||
{ |
||||
my ($node, $filename, $blkno, $offset, $broken_pages, $num_checksum, |
||||
$test_prefix) |
||||
= @_; |
||||
my $fake_data = hex '0x0000'; |
||||
|
||||
# Stop the server cleanly to flush any pages, and to prevent any |
||||
# concurrent updates on what is going to be updated. |
||||
$node->stop; |
||||
my $original_block = read_page($filename, 0); |
||||
my $original_data = get_uint16_from_page($original_block, $offset); |
||||
|
||||
isnt($original_data, $fake_data, |
||||
"$test_prefix: fake data at offset $offset should be different from the existing one" |
||||
); |
||||
|
||||
my $new_block = set_uint16_to_page($original_block, $fake_data, $offset); |
||||
isnt( |
||||
$original_data, |
||||
get_uint16_from_page($new_block, $offset), |
||||
"$test_prefix: The data at offset $offset should have been changed in memory" |
||||
); |
||||
|
||||
write_page($filename, $new_block, 0); |
||||
|
||||
my $written_data = get_uint16_from_page(read_page($filename, 0), $offset); |
||||
|
||||
# Some offline checks to validate that the corrupted data is in place. |
||||
isnt($original_data, $written_data, |
||||
"$test_prefix: data written at offset $offset should be different from the original one" |
||||
); |
||||
is( get_uint16_from_page($new_block, $offset), |
||||
$written_data, |
||||
"$test_prefix: data written at offset $offset should be the same as the one in memory" |
||||
); |
||||
is($written_data, $fake_data, |
||||
"$test_prefix: The data written at offset $offset should be the one we wanted to write" |
||||
); |
||||
|
||||
# The corruption is in place, start the server to run the checks. |
||||
$node->start; |
||||
run_page_checks($node, $num_checksum, $broken_pages, $test_prefix); |
||||
|
||||
# Stop the server, put the original page back in place. |
||||
$node->stop; |
||||
|
||||
$new_block = set_uint16_to_page($original_block, $original_data, $offset); |
||||
is( $original_data, |
||||
get_uint16_from_page($new_block, $offset), |
||||
"$test_prefix: data at offset $offset should have been restored in memory" |
||||
); |
||||
|
||||
write_page($filename, $new_block, 0); |
||||
is( $original_data, |
||||
get_uint16_from_page(read_page($filename, $blkno), $offset), |
||||
"$test_prefix: data at offset $offset should have been restored on disk" |
||||
); |
||||
|
||||
# There should be no errors now that the contents are back in place. |
||||
$node->start; |
||||
run_page_checks($node, 0, '', $test_prefix); |
||||
} |
||||
|
||||
# Data checksums are necessary for this test. |
||||
my $node = get_new_node('main'); |
||||
$node->init(extra => ['--data-checksums']); |
||||
$node->start; |
||||
|
||||
my $stdout = |
||||
$node->safe_psql('postgres', "SELECT" . " current_setting('block_size')"); |
||||
|
||||
$BLOCKSIZE = $stdout; |
||||
|
||||
# Basic schema to corrupt and check |
||||
$node->safe_psql( |
||||
'postgres', q| |
||||
CREATE TABLE public.t1(id integer); |
||||
INSERT INTO public.t1 SELECT generate_series(1, 100); |
||||
CHECKPOINT; |
||||
|); |
||||
|
||||
# Get the path to the relation file that will get manipulated by the |
||||
# follow-up tests with some on-disk corruptions. |
||||
$stdout = $node->safe_psql('postgres', |
||||
"SELECT" |
||||
. " current_setting('data_directory') || '/' || pg_relation_filepath('t1')" |
||||
); |
||||
|
||||
my $filename = $stdout; |
||||
|
||||
# Normal case without corruptions, this passes, with pg_stat_database |
||||
# reporting no errors. |
||||
check_pg_stat_database($node, 'start'); |
||||
|
||||
# Test with a modified checksum. |
||||
corrupt_and_test_block($node, $filename, 0, $CHECKSUM_UINT16_OFFSET, 't1|0', |
||||
1, 'broken checksum'); |
||||
|
||||
# Test corruption making the block looking like it validates PageIsNew(). |
||||
corrupt_and_test_block($node, $filename, 0, $PD_UPPER_UINT16_OFFSET, 't1|0', |
||||
0, 'new page'); |
||||
|
||||
# Check that the number of errors in pg_stat_database match what we |
||||
# expect with the corruptions previously introduced. |
||||
check_pg_stat_database($node, 'end'); |
@ -1,72 +0,0 @@ |
||||
-- |
||||
-- Tests for functions related to relation pages |
||||
-- |
||||
-- Restricted to superusers by default |
||||
CREATE ROLE regress_pgfunc_user; |
||||
SET ROLE regress_pgfunc_user; |
||||
SELECT pg_relation_check_pages('pg_class'); -- error |
||||
ERROR: permission denied for function pg_relation_check_pages |
||||
SELECT pg_relation_check_pages('pg_class', 'main'); -- error |
||||
ERROR: permission denied for function pg_relation_check_pages |
||||
RESET ROLE; |
||||
DROP ROLE regress_pgfunc_user; |
||||
-- NULL and simple sanity checks |
||||
SELECT pg_relation_check_pages(NULL); -- empty result |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
SELECT pg_relation_check_pages(NULL, NULL); -- empty result |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
SELECT pg_relation_check_pages('pg_class', 'invalid_fork'); -- error |
||||
ERROR: invalid fork name |
||||
HINT: Valid fork names are "main", "fsm", "vm", and "init". |
||||
-- Relation types that are supported |
||||
CREATE TABLE pgfunc_test_tab (id int); |
||||
CREATE INDEX pgfunc_test_ind ON pgfunc_test_tab(id); |
||||
INSERT INTO pgfunc_test_tab VALUES (generate_series(1,1000)); |
||||
SELECT pg_relation_check_pages('pgfunc_test_tab'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
SELECT pg_relation_check_pages('pgfunc_test_ind'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
DROP TABLE pgfunc_test_tab; |
||||
CREATE MATERIALIZED VIEW pgfunc_test_matview AS SELECT 1; |
||||
SELECT pg_relation_check_pages('pgfunc_test_matview'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
DROP MATERIALIZED VIEW pgfunc_test_matview; |
||||
CREATE SEQUENCE pgfunc_test_seq; |
||||
SELECT pg_relation_check_pages('pgfunc_test_seq'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
DROP SEQUENCE pgfunc_test_seq; |
||||
-- pg_relation_check_pages() returns no results if passed relations that |
||||
-- do not support the operation, like relations without storage or temporary |
||||
-- relations. |
||||
CREATE TEMPORARY TABLE pgfunc_test_temp AS SELECT generate_series(1,10) AS a; |
||||
SELECT pg_relation_check_pages('pgfunc_test_temp'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
DROP TABLE pgfunc_test_temp; |
||||
CREATE VIEW pgfunc_test_view AS SELECT 1; |
||||
SELECT pg_relation_check_pages('pgfunc_test_view'); |
||||
pg_relation_check_pages |
||||
------------------------- |
||||
(0 rows) |
||||
|
||||
DROP VIEW pgfunc_test_view; |
@ -1,41 +0,0 @@ |
||||
-- |
||||
-- Tests for functions related to relation pages |
||||
-- |
||||
|
||||
-- Restricted to superusers by default |
||||
CREATE ROLE regress_pgfunc_user; |
||||
SET ROLE regress_pgfunc_user; |
||||
SELECT pg_relation_check_pages('pg_class'); -- error |
||||
SELECT pg_relation_check_pages('pg_class', 'main'); -- error |
||||
RESET ROLE; |
||||
DROP ROLE regress_pgfunc_user; |
||||
|
||||
-- NULL and simple sanity checks |
||||
SELECT pg_relation_check_pages(NULL); -- empty result |
||||
SELECT pg_relation_check_pages(NULL, NULL); -- empty result |
||||
SELECT pg_relation_check_pages('pg_class', 'invalid_fork'); -- error |
||||
|
||||
-- Relation types that are supported |
||||
CREATE TABLE pgfunc_test_tab (id int); |
||||
CREATE INDEX pgfunc_test_ind ON pgfunc_test_tab(id); |
||||
INSERT INTO pgfunc_test_tab VALUES (generate_series(1,1000)); |
||||
SELECT pg_relation_check_pages('pgfunc_test_tab'); |
||||
SELECT pg_relation_check_pages('pgfunc_test_ind'); |
||||
DROP TABLE pgfunc_test_tab; |
||||
|
||||
CREATE MATERIALIZED VIEW pgfunc_test_matview AS SELECT 1; |
||||
SELECT pg_relation_check_pages('pgfunc_test_matview'); |
||||
DROP MATERIALIZED VIEW pgfunc_test_matview; |
||||
CREATE SEQUENCE pgfunc_test_seq; |
||||
SELECT pg_relation_check_pages('pgfunc_test_seq'); |
||||
DROP SEQUENCE pgfunc_test_seq; |
||||
|
||||
-- pg_relation_check_pages() returns no results if passed relations that |
||||
-- do not support the operation, like relations without storage or temporary |
||||
-- relations. |
||||
CREATE TEMPORARY TABLE pgfunc_test_temp AS SELECT generate_series(1,10) AS a; |
||||
SELECT pg_relation_check_pages('pgfunc_test_temp'); |
||||
DROP TABLE pgfunc_test_temp; |
||||
CREATE VIEW pgfunc_test_view AS SELECT 1; |
||||
SELECT pg_relation_check_pages('pgfunc_test_view'); |
||||
DROP VIEW pgfunc_test_view; |
Loading…
Reference in new issue