@ -11,7 +11,7 @@
* Portions Copyright ( c ) 1996 - 2008 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
* $ PostgreSQL : pgsql / src / backend / access / transam / xlogutils . c , v 1.54 2008 / 06 / 08 22 : 00 : 47 alvherre Exp $
* $ PostgreSQL : pgsql / src / backend / access / transam / xlogutils . c , v 1.55 2008 / 06 / 12 09 : 12 : 30 heikki Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -190,6 +190,9 @@ XLogCheckInvalidPages(void)
if ( foundone )
elog ( PANIC , " WAL contains references to invalid pages " ) ;
hash_destroy ( invalid_page_tab ) ;
invalid_page_tab = NULL ;
}
@ -218,27 +221,40 @@ XLogCheckInvalidPages(void)
* at the end of WAL replay . )
*/
Buffer
XLogReadBuffer ( Relation reln , BlockNumber blkno , bool init )
XLogReadBuffer ( RelFileNode rnode , BlockNumber blkno , bool init )
{
BlockNumber lastblock = RelationGetNumberOfBlocks ( reln ) ;
BlockNumber lastblock ;
Buffer buffer ;
SMgrRelation smgr ;
Assert ( blkno ! = P_NEW ) ;
/* Open the relation at smgr level */
smgr = smgropen ( rnode ) ;
/*
* Create the target file if it doesn ' t already exist . This lets us cope
* if the replay sequence contains writes to a relation that is later
* deleted . ( The original coding of this routine would instead suppress
* the writes , but that seems like it risks losing valuable data if the
* filesystem loses an inode during a crash . Better to write the data
* until we are actually told to delete the file . )
*/
smgrcreate ( smgr , false , true ) ;
lastblock = smgrnblocks ( smgr ) ;
if ( blkno < lastblock )
{
/* page exists in file */
if ( init )
buffer = ReadOrZeroBuffer ( reln , blkno ) ;
else
buffer = ReadBuffer ( reln , blkno ) ;
buffer = ReadBufferWithoutRelcache ( rnode , false , blkno , init ) ;
}
else
{
/* hm, page doesn't exist in file */
if ( ! init )
{
log_invalid_page ( reln - > rd_node , blkno , false ) ;
log_invalid_page ( rnode , blkno , false ) ;
return InvalidBuffer ;
}
/* OK to extend the file */
@ -249,7 +265,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
{
if ( buffer ! = InvalidBuffer )
ReleaseBuffer ( buffer ) ;
buffer = ReadBuffer ( reln , P_NEW ) ;
buffer = ReadBufferWithoutRelcache ( rnod e , false , P_NEW , false ) ;
lastblock + + ;
}
Assert ( BufferGetBlockNumber ( buffer ) = = blkno ) ;
@ -265,7 +281,7 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
if ( PageIsNew ( ( PageHeader ) page ) )
{
UnlockReleaseBuffer ( buffer ) ;
log_invalid_page ( reln - > rd_ node , blkno , true ) ;
log_invalid_page ( rnode , blkno , true ) ;
return InvalidBuffer ;
}
}
@ -275,226 +291,81 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
/*
* Lightweight " Relation " cache - - - this substitutes for the normal relcache
* during XLOG replay .
* Struct actually returned by XLogFakeRelcacheEntry , though the declared
* return type is Relation .
*/
typedef struct XLogRelDesc
{
RelationData reldata ;
struct XLogRelDesc * lessRecently ;
struct XLogRelDesc * moreRecently ;
} XLogRelDesc ;
typedef struct XLogRelCacheEntry
typedef struct
{
RelFileNode rnode ;
XLogRelDesc * rdes c;
} XLogRelCacheEntry ;
RelationData reldata ; /* Note: this must be first */
FormData_pg_class pgc ;
} FakeRelCacheEntryData ;
static HTAB * _xlrelcache ;
static XLogRelDesc * _xlrelarr = NULL ;
static Form_pg_class _xlpgcarr = NULL ;
static int _xlast = 0 ;
static int _xlcnt = 0 ;
typedef FakeRelCacheEntryData * FakeRelCacheEntry ;
# define _XLOG_RELCACHESIZE 512
static void
_xl_init_rel_cache ( void )
{
HASHCTL ctl ;
_xlcnt = _XLOG_RELCACHESIZE ;
_xlast = 0 ;
_xlrelarr = ( XLogRelDesc * ) malloc ( sizeof ( XLogRelDesc ) * _xlcnt ) ;
memset ( _xlrelarr , 0 , sizeof ( XLogRelDesc ) * _xlcnt ) ;
_xlpgcarr = ( Form_pg_class ) malloc ( sizeof ( FormData_pg_class ) * _xlcnt ) ;
memset ( _xlpgcarr , 0 , sizeof ( FormData_pg_class ) * _xlcnt ) ;
_xlrelarr [ 0 ] . moreRecently = & ( _xlrelarr [ 0 ] ) ;
_xlrelarr [ 0 ] . lessRecently = & ( _xlrelarr [ 0 ] ) ;
memset ( & ctl , 0 , sizeof ( ctl ) ) ;
ctl . keysize = sizeof ( RelFileNode ) ;
ctl . entrysize = sizeof ( XLogRelCacheEntry ) ;
ctl . hash = tag_hash ;
_xlrelcache = hash_create ( " XLOG relcache " , _XLOG_RELCACHESIZE ,
& ctl , HASH_ELEM | HASH_FUNCTION ) ;
}
static void
_xl_remove_hash_entry ( XLogRelDesc * rdesc )
{
Form_pg_class tpgc = rdesc - > reldata . rd_rel ;
XLogRelCacheEntry * hentry ;
rdesc - > lessRecently - > moreRecently = rdesc - > moreRecently ;
rdesc - > moreRecently - > lessRecently = rdesc - > lessRecently ;
hentry = ( XLogRelCacheEntry * ) hash_search ( _xlrelcache ,
( void * ) & ( rdesc - > reldata . rd_node ) , HASH_REMOVE , NULL ) ;
if ( hentry = = NULL )
elog ( PANIC , " _xl_remove_hash_entry: file was not found in cache " ) ;
RelationCloseSmgr ( & ( rdesc - > reldata ) ) ;
memset ( rdesc , 0 , sizeof ( XLogRelDesc ) ) ;
memset ( tpgc , 0 , sizeof ( FormData_pg_class ) ) ;
rdesc - > reldata . rd_rel = tpgc ;
}
static XLogRelDesc *
_xl_new_reldesc ( void )
{
XLogRelDesc * res ;
_xlast + + ;
if ( _xlast < _xlcnt )
{
_xlrelarr [ _xlast ] . reldata . rd_rel = & ( _xlpgcarr [ _xlast ] ) ;
return & ( _xlrelarr [ _xlast ] ) ;
}
/* reuse */
res = _xlrelarr [ 0 ] . moreRecently ;
_xl_remove_hash_entry ( res ) ;
_xlast - - ;
return res ;
}
void
XLogInitRelationCache ( void )
{
_xl_init_rel_cache ( ) ;
invalid_page_tab = NULL ;
}
void
XLogCloseRelationCache ( void )
/*
* Create a fake relation cache entry for a physical relation
*
* It ' s often convenient to use the same functions in XLOG replay as in the
* main codepath , but those functions typically work with a relcache entry .
* We don ' t have a working relation cache during XLOG replay , but this
* function can be used to create a fake relcache entry instead . Only the
* fields related to physical storage , like rd_rel , are initialized , so the
* fake entry is only usable in low - level operations like ReadBuffer ( ) .
*
* Caller must free the returned entry with FreeFakeRelcacheEntry ( ) .
*/
Relation
CreateFakeRelcacheEntry ( RelFileNode rnode )
{
HASH_SEQ_STATUS status ;
XLogRelCacheEntry * hentry ;
FakeRelCacheEntry fakeentry ;
Relation rel ;
if ( ! _xlrelarr )
return ;
/* Allocate the Relation struct and all related space in one block. */
fakeentry = palloc0 ( sizeof ( FakeRelCacheEntryData ) ) ;
rel = ( Relation ) fakeentry ;
hash_seq_init ( & status , _xlrelcache ) ;
rel - > rd_rel = & fakeentry - > pgc ;
rel - > rd_node = rnode ;
while ( ( hentry = ( XLogRelCacheEntry * ) hash_seq_search ( & status ) ) ! = NULL )
_xl_remove_hash_entry ( hentry - > rdesc ) ;
/* We don't know the name of the relation; use relfilenode instead */
sprintf ( RelationGetRelationName ( rel ) , " %u " , rnode . relNode ) ;
hash_destroy ( _xlrelcache ) ;
/*
* We set up the lockRelId in case anything tries to lock the dummy
* relation . Note that this is fairly bogus since relNode may be
* different from the relation ' s OID . It shouldn ' t really matter
* though , since we are presumably running by ourselves and can ' t have
* any lock conflicts . . .
*/
rel - > rd_lockInfo . lockRelId . dbId = rnode . dbNode ;
rel - > rd_lockInfo . lockRelId . relId = rnode . relNode ;
free ( _xlrelarr ) ;
free ( _xlpgcarr ) ;
rel - > rd_targblock = InvalidBlockNumber ;
rel - > rd_smgr = NULL ;
_xlrelarr = NULL ;
return rel ;
}
/*
* Open a relation during XLOG replay
*
* Note : this once had an API that allowed NULL return on failure , but it
* no longer does ; any failure results in elog ( ) .
* Free a fake relation cache entry .
*/
Relation
XLogOpenRelation ( RelFileNode rnode )
void
FreeFakeRelcacheEntry ( Relation fakerel )
{
XLogRelDesc * res ;
XLogRelCacheEntry * hentry ;
bool found ;
hentry = ( XLogRelCacheEntry * )
hash_search ( _xlrelcache , ( void * ) & rnode , HASH_FIND , NULL ) ;
if ( hentry )
{
res = hentry - > rdesc ;
res - > lessRecently - > moreRecently = res - > moreRecently ;
res - > moreRecently - > lessRecently = res - > lessRecently ;
}
else
{
res = _xl_new_reldesc ( ) ;
sprintf ( RelationGetRelationName ( & ( res - > reldata ) ) , " %u " , rnode . relNode ) ;
res - > reldata . rd_node = rnode ;
/*
* We set up the lockRelId in case anything tries to lock the dummy
* relation . Note that this is fairly bogus since relNode may be
* different from the relation ' s OID . It shouldn ' t really matter
* though , since we are presumably running by ourselves and can ' t have
* any lock conflicts . . .
*/
res - > reldata . rd_lockInfo . lockRelId . dbId = rnode . dbNode ;
res - > reldata . rd_lockInfo . lockRelId . relId = rnode . relNode ;
hentry = ( XLogRelCacheEntry * )
hash_search ( _xlrelcache , ( void * ) & rnode , HASH_ENTER , & found ) ;
if ( found )
elog ( PANIC , " xlog relation already present on insert into cache " ) ;
hentry - > rdesc = res ;
res - > reldata . rd_targblock = InvalidBlockNumber ;
res - > reldata . rd_smgr = NULL ;
RelationOpenSmgr ( & ( res - > reldata ) ) ;
/*
* Create the target file if it doesn ' t already exist . This lets us
* cope if the replay sequence contains writes to a relation that is
* later deleted . ( The original coding of this routine would instead
* return NULL , causing the writes to be suppressed . But that seems
* like it risks losing valuable data if the filesystem loses an inode
* during a crash . Better to write the data until we are actually
* told to delete the file . )
*/
smgrcreate ( res - > reldata . rd_smgr , res - > reldata . rd_istemp , true ) ;
}
res - > moreRecently = & ( _xlrelarr [ 0 ] ) ;
res - > lessRecently = _xlrelarr [ 0 ] . lessRecently ;
_xlrelarr [ 0 ] . lessRecently = res ;
res - > lessRecently - > moreRecently = res ;
return & ( res - > reldata ) ;
pfree ( fakerel ) ;
}
/*
* Drop a relation during XLOG replay
*
* This is called when the relation is about to be deleted ; we need to ensure
* that there is no dangling smgr reference in the xlog relation cache .
*
* Currently , we don ' t bother to physically remove the relation from the
* cache , we just let it age out normally .
*
* This also takes care of removing any open " invalid-page " records for
* the relation .
* This is called when the relation is about to be deleted ; we need to remove
* any open " invalid-page " records for the relation .
*/
void
XLogDropRelation ( RelFileNode rnode )
{
XLogRelCacheEntry * hentry ;
hentry = ( XLogRelCacheEntry * )
hash_search ( _xlrelcache , ( void * ) & rnode , HASH_FIND , NULL ) ;
if ( hentry )
{
XLogRelDesc * rdesc = hentry - > rdesc ;
RelationCloseSmgr ( & ( rdesc - > reldata ) ) ;
}
/* Tell smgr to forget about this relation as well */
smgrclosenode ( rnode ) ;
forget_invalid_pages ( rnode , 0 ) ;
}
@ -507,18 +378,14 @@ XLogDropRelation(RelFileNode rnode)
void
XLogDropDatabase ( Oid dbid )
{
HASH_SEQ_STATUS status ;
XLogRelCacheEntry * hentry ;
hash_seq_init ( & status , _xlrelcache ) ;
while ( ( hentry = ( XLogRelCacheEntry * ) hash_seq_search ( & status ) ) ! = NULL )
{
XLogRelDesc * rdesc = hentry - > rdesc ;
if ( hentry - > rnode . dbNode = = dbid )
RelationCloseSmgr ( & ( rdesc - > reldata ) ) ;
}
/*
* This is unnecessarily heavy - handed , as it will close SMgrRelation
* objects for other databases as well . DROP DATABASE occurs seldom
* enough that it ' s not worth introducing a variant of smgrclose for
* just this purpose . XXX : Or should we rather leave the smgr entries
* dangling ?
*/
smgrcloseall ( ) ;
forget_invalid_pages_db ( dbid ) ;
}
@ -526,8 +393,7 @@ XLogDropDatabase(Oid dbid)
/*
* Truncate a relation during XLOG replay
*
* We don ' t need to do anything to the fake relcache , but we do need to
* clean up any open " invalid-page " records for the dropped pages .
* We need to clean up any open " invalid-page " records for the dropped pages .
*/
void
XLogTruncateRelation ( RelFileNode rnode , BlockNumber nblocks )