@ -2481,10 +2481,10 @@ again:
/*
* If the buffer was dirty , try to write it out . There is a race
* condition here , in that someone might dirty it after we released the
* buffer header lock above , or even while we are writing it out ( sinc e
* our share - lock won ' t prevent hint - bit updates ) . We will recheck the
* dirty bit after re - locking the buffer header .
* condition here , another backend could dirty the buffer between
* StrategyGetBuffer ( ) checking that it is not in use and invalidating th e
* buffer below . That ' s addressed by InvalidateVictimBuffer ( ) verifying
* that the buffer is not dirty .
*/
if ( buf_state & BM_DIRTY )
{
@ -2492,20 +2492,20 @@ again:
Assert ( buf_state & BM_VALID ) ;
/*
* We need a share - lock on the buffer contents to write it out ( else
* we might write invalid data , eg because someone else is compacting
* the page contents while we write ) . We must use a conditional lock
* acquisition here to avoid deadlock . Even though the buffer was not
* pinned ( and therefore surely not locked ) when StrategyGetBuffer
* returned it , someone else could have pinned and exclusive - locked it
* by the time we get here . If we try to get the lock unconditionally ,
* we ' d block waiting for them ; if they later block waiting for us ,
* deadlock ensues . ( This has been observed to happen when two
* backends are both trying to split btree index pages , and the second
* one just happens to be trying to split the page the first one go t
* from StrategyGetBuffer . )
* We need a share - exclusive lock on the buffer contents to write it
* out ( else we might write invalid data , eg because someone else is
* compacting the page contents while we write ) . We must use a
* conditional lock acquisition here to avoid deadlock . Even though
* the buffer was not pinned ( and therefore surely not locked ) when
* StrategyGetBuffer returned it , someone else could have pinned and
* ( share - ) exclusive - locked it by the time we get here . If we try to
* get the lock unconditionally , we ' d block waiting for them ; if they
* later block waiting for us , deadlock ensues . ( This has been
* observed to happen when two backends are both trying to split btree
* index pages , and the second one just happens to be trying to split
* the page the first one got from StrategyGetBuffer . )
*/
if ( ! BufferLockConditional ( buf , buf_hdr , BUFFER_LOCK_SHARE ) )
if ( ! BufferLockConditional ( buf , buf_hdr , BUFFER_LOCK_SHARE_EXCLUSIVE ) )
{
/*
* Someone else has locked the buffer , so give it up and loop back
@ -2518,18 +2518,14 @@ again:
/*
* If using a nondefault strategy , and writing the buffer would
* require a WAL flush , let the strategy decide whether to go ahead
* and write / reuse the buffer or to choose another victim . We need a
* lock to inspect the page LSN , so this can ' t be done inside
* and write / reuse the buffer or to choose another victim . We need to
* hold the content lock in at least share - exclusive mode to safely
* inspect the page LSN , so this couldn ' t have been done inside
* StrategyGetBuffer .
*/
if ( strategy ! = NULL )
{
XLogRecPtr lsn ;
/* Read the LSN while holding buffer header lock */
buf_state = LockBufHdr ( buf_hdr ) ;
lsn = BufferGetLSN ( buf_hdr ) ;
UnlockBufHdr ( buf_hdr ) ;
XLogRecPtr lsn = BufferGetLSN ( buf_hdr ) ;
if ( XLogNeedsFlush ( lsn )
& & StrategyRejectBuffer ( strategy , buf_hdr , from_ring ) )
@ -3019,7 +3015,7 @@ BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
*
* Checks if buffer is already dirty .
*
* Buffer must be pinned and exclusive - locked . ( Without an exclusive lock ,
* Buffer must be pinned and [ share - ] exclusive - locked . ( Without such a lock ,
* the result may be stale before it ' s returned . )
*/
bool
@ -3039,7 +3035,8 @@ BufferIsDirty(Buffer buffer)
else
{
bufHdr = GetBufferDescriptor ( buffer - 1 ) ;
Assert ( BufferIsLockedByMeInMode ( buffer , BUFFER_LOCK_EXCLUSIVE ) ) ;
Assert ( BufferIsLockedByMeInMode ( buffer , BUFFER_LOCK_SHARE_EXCLUSIVE ) | |
BufferIsLockedByMeInMode ( buffer , BUFFER_LOCK_EXCLUSIVE ) ) ;
}
return pg_atomic_read_u64 ( & bufHdr - > state ) & BM_DIRTY ;
@ -4074,8 +4071,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
}
/*
* Pin it , share - lock it , write it . ( FlushBuffer will do nothing if the
* buffer is clean by the time we ' ve locked it . )
* Pin it , share - exclusive - lock it , write it . ( FlushBuffer will do
* nothing if the buffer is clean by the time we ' ve locked it . )
*/
PinBuffer_Locked ( bufHdr ) ;
@ -4405,11 +4402,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
* However , we will need to force the changes to disk via fsync before
* we can checkpoint WAL .
*
* The caller must hold a pin on the buffer and have share - locked the
* buffer contents . ( Note : a share - lock does not prevent updates of
* hint bits in the buffer , so the page could change while the write
* is in progress , but we assume that that will not invalidate the data
* written . )
* The caller must hold a pin on the buffer and have
* ( share - ) exclusively - locked the buffer contents .
*
* If the caller has an smgr reference for the buffer ' s relation , pass it
* as the second parameter . If not , pass NULL .
@ -4425,6 +4419,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
char * bufToWrite ;
uint64 buf_state ;
Assert ( BufferLockHeldByMeInMode ( buf , BUFFER_LOCK_EXCLUSIVE ) | |
BufferLockHeldByMeInMode ( buf , BUFFER_LOCK_SHARE_EXCLUSIVE ) ) ;
/*
* Try to start an I / O operation . If StartBufferIO returns false , then
* someone else flushed the buffer before we could , so we need not do
@ -4452,8 +4449,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
buf_state = LockBufHdr ( buf ) ;
/*
* Run PageGetLSN while holding header lock , since we don ' t have the
* buffer locked exclusively in all cases .
* As we hold at least a share - exclusive lock on the buffer , the LSN
* cannot change during the flush ( and thus can ' t be torn ) .
*/
recptr = BufferGetLSN ( buf ) ;
@ -4557,7 +4554,7 @@ FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln,
{
Buffer buffer = BufferDescriptorGetBuffer ( buf ) ;
BufferLockAcquire ( buffer , buf , BUFFER_LOCK_SHARE ) ;
BufferLockAcquire ( buffer , buf , BUFFER_LOCK_SHARE_EXCLUSIVE ) ;
FlushBuffer ( buf , reln , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
BufferLockUnlock ( buffer , buf ) ;
}
@ -4629,8 +4626,9 @@ BufferIsPermanent(Buffer buffer)
/*
* BufferGetLSNAtomic
* Retrieves the LSN of the buffer atomically using a buffer header lock .
* This is necessary for some callers who may not have an exclusive lock
* on the buffer .
* This is necessary for some callers who may only hold a share lock on
* the buffer . A share lock allows a concurrent backend to set hint bits
* on the page , which in turn may require a WAL record to be emitted .
*/
XLogRecPtr
BufferGetLSNAtomic ( Buffer buffer )
@ -5476,8 +5474,8 @@ FlushDatabaseBuffers(Oid dbid)
}
/*
* Flush a previously , shared or exclusively , locked and pinned buffer to the
* OS .
* Flush a previously , share - exclusively or exclusively , locked and pinned
* buffer to the OS .
*/
void
FlushOneBuffer ( Buffer buffer )
@ -5550,56 +5548,38 @@ IncrBufferRefCount(Buffer buffer)
}
/*
* MarkBufferDirtyHint
*
* Mark a buffer dirty for non - critical changes .
* Shared - buffer only helper for MarkBufferDirtyHint ( ) and
* BufferSetHintBits16 ( ) .
*
* This is essentially the same as MarkBufferDirty , except :
*
* 1. The caller does not write WAL ; so if checksums are enabled , we may need
* to write an XLOG_FPI_FOR_HINT WAL record to protect against torn pages .
* 2. The caller might have only share - lock instead of exclusive - lock on the
* buffer ' s content lock .
* 3. This function does not guarantee that the buffer is always marked dirty
* ( due to a race condition ) , so it cannot be used for important changes .
* This is separated out because it turns out that the repeated checks for
* local buffers , repeated GetBufferDescriptor ( ) and repeated reading of the
* buffer ' s state sufficiently hurts the performance of BufferSetHintBits16 ( ) .
*/
void
MarkBufferDirtyHint ( Buffer buffer , bool buffer_std )
static inline void
MarkSharedBufferDirtyHint ( Buffer buffer , BufferDesc * bufHdr , uint64 lockstate ,
bool buffer_std )
{
BufferDesc * bufHdr ;
Page page = BufferGetPage ( buffer ) ;
if ( ! BufferIsValid ( buffer ) )
elog ( ERROR , " bad buffer ID: %d " , buffer ) ;
if ( BufferIsLocal ( buffer ) )
{
MarkLocalBufferDirty ( buffer ) ;
return ;
}
bufHdr = GetBufferDescriptor ( buffer - 1 ) ;
Assert ( GetPrivateRefCount ( buffer ) > 0 ) ;
/* here, either share or exclusive lock is OK */
Assert ( BufferIsLockedByMe ( buffer ) ) ;
/* here, either share-exclusive or exclusive lock is OK */
Assert ( BufferLockHeldByMeInMode ( bufHdr , BUFFER_LOCK_EXCLUSIVE ) | |
BufferLockHeldByMeInMode ( bufHdr , BUFFER_LOCK_SHARE_EXCLUSIVE ) ) ;
/*
* This routine might get called many times on the same page , if we are
* making the first scan after commit of an xact that added / deleted many
* tuples . So , be as quick as we can if the buffer is already dirty . We
* do this by not acquiring spinlock if it looks like the status bits are
* already set . Since we make this test unlocked , there ' s a chance we
* might fail to notice that the flags have just been cleared , and failed
* to reset them , due to memory - ordering issues . But since this function
* is only intended to be used in cases where failing to write out the
* data would be harmless anyway , it doesn ' t really matter .
* tuples . So , be as quick as we can if the buffer is already dirty .
*
* As we are holding ( at least ) a share - exclusive lock , nobody could have
* cleaned or dirtied the page concurrently , so we can just rely on the
* previously fetched value here without any danger of races .
*/
if ( ( pg_atomic_read_u64 ( & bufHdr - > state ) & ( BM_DIRTY | BM_JUST_DIRTIED ) ) ! =
( BM_DIRTY | BM_JUST_DIRTIED ) )
if ( unlikely ( ( lockstate & ( BM_DIRTY | BM_JUST_DIRTIED ) ) ! =
( BM_DIRTY | BM_JUST_DIRTIED ) ) )
{
XLogRecPtr lsn = InvalidXLogRecPtr ;
bool dirtied = false ;
bool delayChkptFlags = false ;
uint64 buf_state ;
@ -5612,8 +5592,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
* We don ' t check full_page_writes here because that logic is included
* when we call XLogInsert ( ) since the value changes dynamically .
*/
if ( XLogHintBitIsNeeded ( ) & &
( pg_atomic_read_u64 ( & bufHdr - > state ) & BM_PERMANENT ) )
if ( XLogHintBitIsNeeded ( ) & & ( lockstate & BM_PERMANENT ) )
{
/*
* If we must not write WAL , due to a relfilelocator - specific
@ -5658,27 +5637,29 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
buf_state = LockBufHdr ( bufHdr ) ;
/*
* It should not be possible for the buffer to already be dirty , see
* comment above .
*/
Assert ( ! ( buf_state & BM_DIRTY ) ) ;
Assert ( BUF_STATE_GET_REFCOUNT ( buf_state ) > 0 ) ;
if ( ! ( buf_state & BM_DIRTY ) )
if ( XLogRecPtrIsValid ( lsn ) )
{
dirtied = true ; /* Means "will be dirtied by this action" */
/*
* Set the page LSN if we wrote a backup block . We aren ' t supposed
* to set this when only holding a share lock but as long as we
* serialise it somehow we ' re OK . We choose to set LSN while
* holding the buffer header lock , which causes any reader of an
* LSN who holds only a share lock to also obtain a buffer header
* lock before using PageGetLSN ( ) , which is enforced in
* BufferGetLSNAtomic ( ) .
* Set the page LSN if we wrote a backup block . To allow backends
* that only hold a share lock on the buffer to read the LSN in a
* tear - free manner , we set the page LSN while holding the buffer
* header lock . This allows any reader of an LSN who holds only a
* share lock to also obtain a buffer header lock before using
* PageGetLSN ( ) to read the LSN in a tear free way . This is done
* in BufferGetLSNAtomic ( ) .
*
* If checksums are enabled , you might think we should reset the
* checksum here . That will happen when the page is written
* sometime later in this checkpoint cycle .
*/
if ( XLogRecPtrIsValid ( lsn ) )
PageSetLSN ( page , lsn ) ;
PageSetLSN ( page , lsn ) ;
}
UnlockBufHdrExt ( bufHdr , buf_state ,
@ -5688,15 +5669,48 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
if ( delayChkptFlags )
MyProc - > delayChkptFlags & = ~ DELAY_CHKPT_START ;
if ( dirtied )
{
pgBufferUsage . shared_blks_dirtied + + ;
if ( VacuumCostActive )
VacuumCostBalance + = VacuumCostPageDirty ;
}
pgBufferUsage . shared_blks_dirtied + + ;
if ( VacuumCostActive )
VacuumCostBalance + = VacuumCostPageDirty ;
}
}
/*
* MarkBufferDirtyHint
*
* Mark a buffer dirty for non - critical changes .
*
* This is essentially the same as MarkBufferDirty , except :
*
* 1. The caller does not write WAL ; so if checksums are enabled , we may need
* to write an XLOG_FPI_FOR_HINT WAL record to protect against torn pages .
* 2. The caller might have only a share - exclusive - lock instead of an
* exclusive - lock on the buffer ' s content lock .
* 3. This function does not guarantee that the buffer is always marked dirty
* ( it e . g . can ' t always on a hot standby ) , so it cannot be used for
* important changes .
*/
inline void
MarkBufferDirtyHint ( Buffer buffer , bool buffer_std )
{
BufferDesc * bufHdr ;
bufHdr = GetBufferDescriptor ( buffer - 1 ) ;
if ( ! BufferIsValid ( buffer ) )
elog ( ERROR , " bad buffer ID: %d " , buffer ) ;
if ( BufferIsLocal ( buffer ) )
{
MarkLocalBufferDirty ( buffer ) ;
return ;
}
MarkSharedBufferDirtyHint ( buffer , bufHdr ,
pg_atomic_read_u64 ( & bufHdr - > state ) ,
buffer_std ) ;
}
/*
* Release buffer content locks for shared buffers .
*
@ -6798,6 +6812,192 @@ IsBufferCleanupOK(Buffer buffer)
return false ;
}
/*
* Helper for BufferBeginSetHintBits ( ) and BufferSetHintBits16 ( ) .
*
* This checks if the current lock mode already suffices to allow hint bits
* being set and , if not , whether the current lock can be upgraded .
*
* Updates * lockstate when returning true .
*/
static inline bool
SharedBufferBeginSetHintBits ( Buffer buffer , BufferDesc * buf_hdr , uint64 * lockstate )
{
uint64 old_state ;
PrivateRefCountEntry * ref ;
BufferLockMode mode ;
ref = GetPrivateRefCountEntry ( buffer , true ) ;
if ( ref = = NULL )
elog ( ERROR , " buffer is not pinned " ) ;
mode = ref - > data . lockmode ;
if ( mode = = BUFFER_LOCK_UNLOCK )
elog ( ERROR , " buffer is not locked " ) ;
/* we're done if we are already holding a sufficient lock level */
if ( mode = = BUFFER_LOCK_EXCLUSIVE | | mode = = BUFFER_LOCK_SHARE_EXCLUSIVE )
{
* lockstate = pg_atomic_read_u64 ( & buf_hdr - > state ) ;
return true ;
}
/*
* We are only holding a share lock right now , try to upgrade it to
* SHARE_EXCLUSIVE .
*/
Assert ( mode = = BUFFER_LOCK_SHARE ) ;
old_state = pg_atomic_read_u64 ( & buf_hdr - > state ) ;
while ( true )
{
uint64 desired_state ;
desired_state = old_state ;
/*
* Can ' t upgrade if somebody else holds the lock in exclusive or
* share - exclusive mode .
*/
if ( unlikely ( ( old_state & ( BM_LOCK_VAL_EXCLUSIVE | BM_LOCK_VAL_SHARE_EXCLUSIVE ) ) ! = 0 ) )
{
return false ;
}
/* currently held lock state */
desired_state - = BM_LOCK_VAL_SHARED ;
/* new lock level */
desired_state + = BM_LOCK_VAL_SHARE_EXCLUSIVE ;
if ( likely ( pg_atomic_compare_exchange_u64 ( & buf_hdr - > state ,
& old_state , desired_state ) ) )
{
ref - > data . lockmode = BUFFER_LOCK_SHARE_EXCLUSIVE ;
* lockstate = desired_state ;
return true ;
}
}
}
/*
* Try to acquire the right to set hint bits on the buffer .
*
* To be allowed to set hint bits , this backend needs to hold either a
* share - exclusive or an exclusive lock . In case this backend only holds a
* share lock , this function will try to upgrade the lock to
* share - exclusive . The caller is only allowed to set hint bits if true is
* returned .
*
* Once BufferBeginSetHintBits ( ) has returned true , hint bits may be set
* without further calls to BufferBeginSetHintBits ( ) , until the buffer is
* unlocked .
*
*
* Requiring a share - exclusive lock to set hint bits prevents setting hint
* bits on buffers that are currently being written out , which could corrupt
* the checksum on the page . Flushing buffers also requires a share - exclusive
* lock .
*
* Due to a lock > = share - exclusive being required to set hint bits , only one
* backend can set hint bits at a time . Allowing multiple backends to set hint
* bits would require more complicated locking : For setting hint bits we ' d
* need to store the count of backends currently setting hint bits , for I / O we
* would need another lock - level conflicting with the hint - setting
* lock - level . Given that the share - exclusive lock for setting hint bits is
* only held for a short time , that backends often would just set the same
* hint bits and that the cost of occasionally not setting hint bits in hotly
* accessed pages is fairly low , this seems like an acceptable tradeoff .
*/
bool
BufferBeginSetHintBits ( Buffer buffer )
{
BufferDesc * buf_hdr ;
uint64 lockstate ;
if ( BufferIsLocal ( buffer ) )
{
/*
* NB : Will need to check if there is a write in progress , once it is
* possible for writes to be done asynchronously .
*/
return true ;
}
buf_hdr = GetBufferDescriptor ( buffer - 1 ) ;
return SharedBufferBeginSetHintBits ( buffer , buf_hdr , & lockstate ) ;
}
/*
* End a phase of setting hint bits on this buffer , started with
* BufferBeginSetHintBits ( ) .
*
* This would strictly speaking not be required ( i . e . the caller could do
* MarkBufferDirtyHint ( ) if so desired ) , but allows us to perform some sanity
* checks .
*/
void
BufferFinishSetHintBits ( Buffer buffer , bool mark_dirty , bool buffer_std )
{
if ( ! BufferIsLocal ( buffer ) )
Assert ( BufferIsLockedByMeInMode ( buffer , BUFFER_LOCK_SHARE_EXCLUSIVE ) | |
BufferIsLockedByMeInMode ( buffer , BUFFER_LOCK_EXCLUSIVE ) ) ;
if ( mark_dirty )
MarkBufferDirtyHint ( buffer , buffer_std ) ;
}
/*
* Try to set hint bits on a single 16 bit value in a buffer .
*
* If hint bits are allowed to be set , set * ptr = val , try to mark the buffer
* dirty and return true . Otherwise false is returned .
*
* * ptr needs to be a pointer to memory within the buffer .
*
* This is a bit faster than BufferBeginSetHintBits ( ) /
* BufferFinishSetHintBits ( ) when setting hints once in a buffer , but slower
* than the former when setting hint bits multiple times in the same buffer .
*/
bool
BufferSetHintBits16 ( uint16 * ptr , uint16 val , Buffer buffer )
{
BufferDesc * buf_hdr ;
uint64 lockstate ;
# ifdef USE_ASSERT_CHECKING
char * page ;
/* verify that the address is on the page */
page = BufferGetPage ( buffer ) ;
Assert ( ( char * ) ptr > = page & & ( char * ) ptr < ( page + BLCKSZ ) ) ;
# endif
if ( BufferIsLocal ( buffer ) )
{
* ptr = val ;
MarkLocalBufferDirty ( buffer ) ;
return true ;
}
buf_hdr = GetBufferDescriptor ( buffer - 1 ) ;
if ( SharedBufferBeginSetHintBits ( buffer , buf_hdr , & lockstate ) )
{
* ptr = val ;
MarkSharedBufferDirtyHint ( buffer , buf_hdr , lockstate , true ) ;
return true ;
}
return false ;
}
/*
* Functions for buffer I / O handling