@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / storage / buffer / bufmgr . c , v 1.219 2007 / 05 / 27 03 : 50 : 39 tgl Exp $
* $ PostgreSQL : pgsql / src / backend / storage / buffer / bufmgr . c , v 1.220 2007 / 05 / 30 20 : 11 : 58 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -90,11 +90,11 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
static Buffer ReadBuffer_common ( Relation reln , BlockNumber blockNum ,
bool zeroPage ) ;
static bool PinBuffer ( volatile BufferDesc * buf ) ;
bool zeroPage ,
BufferAccessStrategy strategy ) ;
static bool PinBuffer ( volatile BufferDesc * buf , BufferAccessStrategy strategy ) ;
static void PinBuffer_Locked ( volatile BufferDesc * buf ) ;
static void UnpinBuffer ( volatile BufferDesc * buf ,
bool fixOwner , bool normalAccess ) ;
static void UnpinBuffer ( volatile BufferDesc * buf , bool fixOwner ) ;
static bool SyncOneBuffer ( int buf_id , bool skip_pinned ) ;
static void WaitIO ( volatile BufferDesc * buf ) ;
static bool StartBufferIO ( volatile BufferDesc * buf , bool forInput ) ;
@ -102,6 +102,7 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits ) ;
static void buffer_write_error_callback ( void * arg ) ;
static volatile BufferDesc * BufferAlloc ( Relation reln , BlockNumber blockNum ,
BufferAccessStrategy strategy ,
bool * foundPtr ) ;
static void FlushBuffer ( volatile BufferDesc * buf , SMgrRelation reln ) ;
static void AtProcExit_Buffers ( int code , Datum arg ) ;
@ -125,7 +126,18 @@ static void AtProcExit_Buffers(int code, Datum arg);
Buffer
ReadBuffer ( Relation reln , BlockNumber blockNum )
{
return ReadBuffer_common ( reln , blockNum , false ) ;
return ReadBuffer_common ( reln , blockNum , false , NULL ) ;
}
/*
* ReadBufferWithStrategy - - same as ReadBuffer , except caller can specify
* a nondefault buffer access strategy . See buffer / README for details .
*/
Buffer
ReadBufferWithStrategy ( Relation reln , BlockNumber blockNum ,
BufferAccessStrategy strategy )
{
return ReadBuffer_common ( reln , blockNum , false , strategy ) ;
}
/*
@ -140,14 +152,15 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
Buffer
ReadOrZeroBuffer ( Relation reln , BlockNumber blockNum )
{
return ReadBuffer_common ( reln , blockNum , true ) ;
return ReadBuffer_common ( reln , blockNum , true , NULL ) ;
}
/*
* ReadBuffer_common - - common logic for ReadBuffer and ReadOrZeroBuffer
* ReadBuffer_common - - common logic for ReadBuffer variants
*/
static Buffer
ReadBuffer_common ( Relation reln , BlockNumber blockNum , bool zeroPage )
ReadBuffer_common ( Relation reln , BlockNumber blockNum , bool zeroPage ,
BufferAccessStrategy strategy )
{
volatile BufferDesc * bufHdr ;
Block bufBlock ;
@ -185,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
* lookup the buffer . IO_IN_PROGRESS is set if the requested block is
* not currently in memory .
*/
bufHdr = BufferAlloc ( reln , blockNum , & found ) ;
bufHdr = BufferAlloc ( reln , blockNum , strategy , & found ) ;
if ( found )
BufferHitCount + + ;
}
@ -330,6 +343,10 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
* buffer . If no buffer exists already , selects a replacement
* victim and evicts the old page , but does NOT read in new page .
*
* " strategy " can be a buffer replacement strategy object , or NULL for
* the default strategy . The selected buffer ' s usage_count is advanced when
* using the default strategy , but otherwise possibly not ( see PinBuffer ) .
*
* The returned buffer is pinned and is already marked as holding the
* desired page . If it already did have the desired page , * foundPtr is
* set TRUE . Otherwise , * foundPtr is set FALSE and the buffer is marked
@ -343,6 +360,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
static volatile BufferDesc *
BufferAlloc ( Relation reln ,
BlockNumber blockNum ,
BufferAccessStrategy strategy ,
bool * foundPtr )
{
BufferTag newTag ; /* identity of requested block */
@ -375,7 +393,7 @@ BufferAlloc(Relation reln,
*/
buf = & BufferDescriptors [ buf_id ] ;
valid = PinBuffer ( buf ) ;
valid = PinBuffer ( buf , strategy ) ;
/* Can release the mapping lock as soon as we've pinned it */
LWLockRelease ( newPartitionLock ) ;
@ -413,13 +431,15 @@ BufferAlloc(Relation reln,
/* Loop here in case we have to try another victim buffer */
for ( ; ; )
{
bool lock_held ;
/*
* Select a victim buffer . The buffer is returned with its header
* spinlock still held ! Also the BufFreelistLock is still held , since
* it would be bad to hold the spinlock while possibly waking up other
* processes .
* spinlock still held ! Also ( in most cases ) the BufFreelistLock is
* still held , since it would be bad to hold the spinlock while
* possibly waking up other p rocesses .
*/
buf = StrategyGetBuffer ( ) ;
buf = StrategyGetBuffer ( strategy , & lock_held ) ;
Assert ( buf - > refcount = = 0 ) ;
@ -430,6 +450,7 @@ BufferAlloc(Relation reln,
PinBuffer_Locked ( buf ) ;
/* Now it's safe to release the freelist lock */
if ( lock_held )
LWLockRelease ( BufFreelistLock ) ;
/*
@ -458,16 +479,34 @@ BufferAlloc(Relation reln,
*/
if ( LWLockConditionalAcquire ( buf - > content_lock , LW_SHARED ) )
{
/*
* If using a nondefault strategy , and writing the buffer
* would require a WAL flush , let the strategy decide whether
* to go ahead and write / reuse the buffer or to choose another
* victim . We need lock to inspect the page LSN , so this
* can ' t be done inside StrategyGetBuffer .
*/
if ( strategy ! = NULL & &
XLogNeedsFlush ( BufferGetLSN ( buf ) ) & &
StrategyRejectBuffer ( strategy , buf ) )
{
/* Drop lock/pin and loop around for another buffer */
LWLockRelease ( buf - > content_lock ) ;
UnpinBuffer ( buf , true ) ;
continue ;
}
/* OK, do the I/O */
FlushBuffer ( buf , NULL ) ;
LWLockRelease ( buf - > content_lock ) ;
}
else
{
/*
* Someone else has pinned the buffer , so give it up and loop
* Someone else has lock ed the buffer , so give it up and loop
* back to get another one .
*/
UnpinBuffer ( buf , true , false /* evidently recently used */ ) ;
UnpinBuffer ( buf , true ) ;
continue ;
}
}
@ -531,10 +570,9 @@ BufferAlloc(Relation reln,
* Got a collision . Someone has already done what we were about to
* do . We ' ll just handle this as if it were found in the buffer
* pool in the first place . First , give up the buffer we were
* planning to use . Don ' t allow it to be thrown in the free list
* ( we don ' t want to hold freelist and mapping locks at once ) .
* planning to use .
*/
UnpinBuffer ( buf , true , false ) ;
UnpinBuffer ( buf , true ) ;
/* Can give up that buffer's mapping partition lock now */
if ( ( oldFlags & BM_TAG_VALID ) & &
@ -545,7 +583,7 @@ BufferAlloc(Relation reln,
buf = & BufferDescriptors [ buf_id ] ;
valid = PinBuffer ( buf ) ;
valid = PinBuffer ( buf , strategy ) ;
/* Can release the mapping lock as soon as we've pinned it */
LWLockRelease ( newPartitionLock ) ;
@ -595,20 +633,21 @@ BufferAlloc(Relation reln,
oldPartitionLock ! = newPartitionLock )
LWLockRelease ( oldPartitionLock ) ;
LWLockRelease ( newPartitionLock ) ;
UnpinBuffer ( buf , true , false /* evidently recently used */ ) ;
UnpinBuffer ( buf , true ) ;
}
/*
* Okay , it ' s finally safe to rename the buffer .
*
* Clearing BM_VALID here is necessary , clearing the dirtybits is just
* paranoia . We also clear the usage_count since any recency of use of
* the old content is no longer relevant .
* paranoia . We also reset the usage_count since any recency of use of
* the old content is no longer relevant . ( The usage_count starts out
* at 1 so that the buffer can survive one clock - sweep pass . )
*/
buf - > tag = newTag ;
buf - > flags & = ~ ( BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR ) ;
buf - > flags | = BM_TAG_VALID ;
buf - > usage_count = 0 ;
buf - > usage_count = 1 ;
UnlockBufHdr ( buf ) ;
@ -736,7 +775,7 @@ retry:
/*
* Insert the buffer at the head of the list of free buffers .
*/
StrategyFreeBuffer ( buf , true ) ;
StrategyFreeBuffer ( buf ) ;
}
/*
@ -814,9 +853,6 @@ ReleaseAndReadBuffer(Buffer buffer,
return buffer ;
ResourceOwnerForgetBuffer ( CurrentResourceOwner , buffer ) ;
LocalRefCount [ - buffer - 1 ] - - ;
if ( LocalRefCount [ - buffer - 1 ] = = 0 & &
bufHdr - > usage_count < BM_MAX_USAGE_COUNT )
bufHdr - > usage_count + + ;
}
else
{
@ -826,7 +862,7 @@ ReleaseAndReadBuffer(Buffer buffer,
if ( bufHdr - > tag . blockNum = = blockNum & &
RelFileNodeEquals ( bufHdr - > tag . rnode , relation - > rd_node ) )
return buffer ;
UnpinBuffer ( bufHdr , true , true ) ;
UnpinBuffer ( bufHdr , true ) ;
}
}
@ -836,6 +872,14 @@ ReleaseAndReadBuffer(Buffer buffer,
/*
* PinBuffer - - make buffer unavailable for replacement .
*
* For the default access strategy , the buffer ' s usage_count is incremented
* when we first pin it ; for other strategies we just make sure the usage_count
* isn ' t zero . ( The idea of the latter is that we don ' t want synchronized
* heap scans to inflate the count , but we need it to not be zero to discourage
* other backends from stealing buffers from our ring . As long as we cycle
* through the ring faster than the global clock - sweep cycles , buffers in
* our ring won ' t be chosen as victims for replacement by other backends . )
*
* This should be applied only to shared buffers , never local ones .
*
* Note that ResourceOwnerEnlargeBuffers must have been done already .
@ -844,7 +888,7 @@ ReleaseAndReadBuffer(Buffer buffer,
* some callers to avoid an extra spinlock cycle .
*/
static bool
PinBuffer ( volatile BufferDesc * buf )
PinBuffer ( volatile BufferDesc * buf , BufferAccessStrategy strategy )
{
int b = buf - > buf_id ;
bool result ;
@ -853,6 +897,16 @@ PinBuffer(volatile BufferDesc *buf)
{
LockBufHdr ( buf ) ;
buf - > refcount + + ;
if ( strategy = = NULL )
{
if ( buf - > usage_count < BM_MAX_USAGE_COUNT )
buf - > usage_count + + ;
}
else
{
if ( buf - > usage_count = = 0 )
buf - > usage_count = 1 ;
}
result = ( buf - > flags & BM_VALID ) ! = 0 ;
UnlockBufHdr ( buf ) ;
}
@ -872,6 +926,11 @@ PinBuffer(volatile BufferDesc *buf)
* PinBuffer_Locked - - as above , but caller already locked the buffer header .
* The spinlock is released before return .
*
* Currently , no callers of this function want to modify the buffer ' s
* usage_count at all , so there ' s no need for a strategy parameter .
* Also we don ' t bother with a BM_VALID test ( the caller could check that for
* itself ) .
*
* Note : use of this routine is frequently mandatory , not just an optimization
* to save a spin lock / unlock cycle , because we need to pin a buffer before
* its state can change under us .
@ -897,17 +956,9 @@ PinBuffer_Locked(volatile BufferDesc *buf)
*
* Most but not all callers want CurrentResourceOwner to be adjusted .
* Those that don ' t should pass fixOwner = FALSE .
*
* normalAccess indicates that we are finishing a " normal " page access ,
* that is , one requested by something outside the buffer subsystem .
* Passing FALSE means it ' s an internal access that should not update the
* buffer ' s usage count nor cause a change in the freelist .
*
* If we are releasing a buffer during VACUUM , and it ' s not been otherwise
* used recently , and normalAccess is true , we send the buffer to the freelist .
*/
static void
UnpinBuffer ( volatile BufferDesc * buf , bool fixOwner , bool normalAccess )
UnpinBuffer ( volatile BufferDesc * buf , bool fixOwner )
{
int b = buf - > buf_id ;
@ -919,8 +970,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
PrivateRefCount [ b ] - - ;
if ( PrivateRefCount [ b ] = = 0 )
{
bool immed_free_buffer = false ;
/* I'd better not still hold any locks on the buffer */
Assert ( ! LWLockHeldByMe ( buf - > content_lock ) ) ;
Assert ( ! LWLockHeldByMe ( buf - > io_in_progress_lock ) ) ;
@ -931,22 +980,7 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
Assert ( buf - > refcount > 0 ) ;
buf - > refcount - - ;
/* Update buffer usage info, unless this is an internal access */
if ( normalAccess )
{
if ( ! strategy_hint_vacuum )
{
if ( buf - > usage_count < BM_MAX_USAGE_COUNT )
buf - > usage_count + + ;
}
else
{
/* VACUUM accesses don't bump usage count, instead... */
if ( buf - > refcount = = 0 & & buf - > usage_count = = 0 )
immed_free_buffer = true ;
}
}
/* Support LockBufferForCleanup() */
if ( ( buf - > flags & BM_PIN_COUNT_WAITER ) & &
buf - > refcount = = 1 )
{
@ -959,14 +993,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
}
else
UnlockBufHdr ( buf ) ;
/*
* If VACUUM is releasing an otherwise - unused buffer , send it to the
* freelist for near - term reuse . We put it at the tail so that it
* won ' t be used before any invalid buffers that may exist .
*/
if ( immed_free_buffer )
StrategyFreeBuffer ( buf , false ) ;
}
}
@ -1150,7 +1176,7 @@ SyncOneBuffer(int buf_id, bool skip_pinned)
FlushBuffer ( bufHdr , NULL ) ;
LWLockRelease ( bufHdr - > content_lock ) ;
UnpinBuffer ( bufHdr , true , false /* don't change freelist */ ) ;
UnpinBuffer ( bufHdr , true ) ;
return true ;
}
@ -1266,7 +1292,7 @@ AtProcExit_Buffers(int code, Datum arg)
* here , it suggests that ResourceOwners are messed up .
*/
PrivateRefCount [ i ] = 1 ; /* make sure we release shared pin */
UnpinBuffer ( buf , false , false /* don't change freelist */ ) ;
UnpinBuffer ( buf , false ) ;
Assert ( PrivateRefCount [ i ] = = 0 ) ;
}
}
@ -1700,7 +1726,7 @@ FlushRelationBuffers(Relation rel)
LWLockAcquire ( bufHdr - > content_lock , LW_SHARED ) ;
FlushBuffer ( bufHdr , rel - > rd_smgr ) ;
LWLockRelease ( bufHdr - > content_lock ) ;
UnpinBuffer ( bufHdr , true , false /* no freelist change */ ) ;
UnpinBuffer ( bufHdr , true ) ;
}
else
UnlockBufHdr ( bufHdr ) ;
@ -1723,11 +1749,7 @@ ReleaseBuffer(Buffer buffer)
if ( BufferIsLocal ( buffer ) )
{
Assert ( LocalRefCount [ - buffer - 1 ] > 0 ) ;
bufHdr = & LocalBufferDescriptors [ - buffer - 1 ] ;
LocalRefCount [ - buffer - 1 ] - - ;
if ( LocalRefCount [ - buffer - 1 ] = = 0 & &
bufHdr - > usage_count < BM_MAX_USAGE_COUNT )
bufHdr - > usage_count + + ;
return ;
}
@ -1738,7 +1760,7 @@ ReleaseBuffer(Buffer buffer)
if ( PrivateRefCount [ buffer - 1 ] > 1 )
PrivateRefCount [ buffer - 1 ] - - ;
else
UnpinBuffer ( bufHdr , false , true ) ;
UnpinBuffer ( bufHdr , false ) ;
}
/*