@ -472,8 +472,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
ForkNumber forkNum ,
ForkNumber forkNum ,
BlockNumber blockNum ,
BlockNumber blockNum ,
BufferAccessStrategy strategy ,
BufferAccessStrategy strategy ,
bool * foundPtr ) ;
bool * foundPtr , IOContext * io_context ) ;
static void FlushBuffer ( BufferDesc * buf , SMgrRelation reln ) ;
static void FlushBuffer ( BufferDesc * buf , SMgrRelation reln ,
IOObject io_object , IOContext io_context ) ;
static void FindAndDropRelationBuffers ( RelFileLocator rlocator ,
static void FindAndDropRelationBuffers ( RelFileLocator rlocator ,
ForkNumber forkNum ,
ForkNumber forkNum ,
BlockNumber nForkBlock ,
BlockNumber nForkBlock ,
@ -814,6 +815,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BufferDesc * bufHdr ;
BufferDesc * bufHdr ;
Block bufBlock ;
Block bufBlock ;
bool found ;
bool found ;
IOContext io_context ;
IOObject io_object ;
bool isExtend ;
bool isExtend ;
bool isLocalBuf = SmgrIsTemp ( smgr ) ;
bool isLocalBuf = SmgrIsTemp ( smgr ) ;
@ -846,7 +849,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if ( isLocalBuf )
if ( isLocalBuf )
{
{
bufHdr = LocalBufferAlloc ( smgr , forkNum , blockNum , & found ) ;
/*
* LocalBufferAlloc ( ) will set the io_context to IOCONTEXT_NORMAL . We
* do not use a BufferAccessStrategy for I / O of temporary tables .
* However , in some cases , the " strategy " may not be NULL , so we can ' t
* rely on IOContextForStrategy ( ) to set the right IOContext for us .
* This may happen in cases like CREATE TEMPORARY TABLE AS . . .
*/
bufHdr = LocalBufferAlloc ( smgr , forkNum , blockNum , & found , & io_context ) ;
if ( found )
if ( found )
pgBufferUsage . local_blks_hit + + ;
pgBufferUsage . local_blks_hit + + ;
else if ( isExtend )
else if ( isExtend )
@ -862,7 +872,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* not currently in memory .
* not currently in memory .
*/
*/
bufHdr = BufferAlloc ( smgr , relpersistence , forkNum , blockNum ,
bufHdr = BufferAlloc ( smgr , relpersistence , forkNum , blockNum ,
strategy , & found ) ;
strategy , & found , & io_context ) ;
if ( found )
if ( found )
pgBufferUsage . shared_blks_hit + + ;
pgBufferUsage . shared_blks_hit + + ;
else if ( isExtend )
else if ( isExtend )
@ -977,7 +987,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
*/
Assert ( ! ( pg_atomic_read_u32 ( & bufHdr - > state ) & BM_VALID ) ) ; /* spinlock not needed */
Assert ( ! ( pg_atomic_read_u32 ( & bufHdr - > state ) & BM_VALID ) ) ; /* spinlock not needed */
bufBlock = isLocalBuf ? LocalBufHdrGetBlock ( bufHdr ) : BufHdrGetBlock ( bufHdr ) ;
if ( isLocalBuf )
{
bufBlock = LocalBufHdrGetBlock ( bufHdr ) ;
io_object = IOOBJECT_TEMP_RELATION ;
}
else
{
bufBlock = BufHdrGetBlock ( bufHdr ) ;
io_object = IOOBJECT_RELATION ;
}
if ( isExtend )
if ( isExtend )
{
{
@ -986,6 +1005,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
/* don't set checksum for all-zero page */
/* don't set checksum for all-zero page */
smgrextend ( smgr , forkNum , blockNum , ( char * ) bufBlock , false ) ;
smgrextend ( smgr , forkNum , blockNum , ( char * ) bufBlock , false ) ;
pgstat_count_io_op ( io_object , io_context , IOOP_EXTEND ) ;
/*
/*
* NB : we ' re * not * doing a ScheduleBufferTagForWriteback here ;
* NB : we ' re * not * doing a ScheduleBufferTagForWriteback here ;
* although we ' re essentially performing a write . At least on linux
* although we ' re essentially performing a write . At least on linux
@ -1013,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgrread ( smgr , forkNum , blockNum , ( char * ) bufBlock ) ;
smgrread ( smgr , forkNum , blockNum , ( char * ) bufBlock ) ;
pgstat_count_io_op ( io_object , io_context , IOOP_READ ) ;
if ( track_io_timing )
if ( track_io_timing )
{
{
INSTR_TIME_SET_CURRENT ( io_time ) ;
INSTR_TIME_SET_CURRENT ( io_time ) ;
@ -1106,14 +1129,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* * foundPtr is actually redundant with the buffer ' s BM_VALID flag , but
* * foundPtr is actually redundant with the buffer ' s BM_VALID flag , but
* we keep it for simplicity in ReadBuffer .
* we keep it for simplicity in ReadBuffer .
*
*
* io_context is passed as an output parameter to avoid calling
* IOContextForStrategy ( ) when there is a shared buffers hit and no IO
* statistics need be captured .
*
* No locks are held either at entry or exit .
* No locks are held either at entry or exit .
*/
*/
static BufferDesc *
static BufferDesc *
BufferAlloc ( SMgrRelation smgr , char relpersistence , ForkNumber forkNum ,
BufferAlloc ( SMgrRelation smgr , char relpersistence , ForkNumber forkNum ,
BlockNumber blockNum ,
BlockNumber blockNum ,
BufferAccessStrategy strategy ,
BufferAccessStrategy strategy ,
bool * foundPtr )
bool * foundPtr , IOContext * io_context )
{
{
bool from_ring ;
BufferTag newTag ; /* identity of requested block */
BufferTag newTag ; /* identity of requested block */
uint32 newHash ; /* hash value for newTag */
uint32 newHash ; /* hash value for newTag */
LWLock * newPartitionLock ; /* buffer partition lock for it */
LWLock * newPartitionLock ; /* buffer partition lock for it */
@ -1165,8 +1193,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
{
/*
/*
* If we get here , previous attempts to read the buffer must
* If we get here , previous attempts to read the buffer must
* have failed . . . but we shall bravely try again .
* have failed . . . but we shall bravely try again . Set
* io_context since we will in fact need to count an IO
* Operation .
*/
*/
* io_context = IOContextForStrategy ( strategy ) ;
* foundPtr = false ;
* foundPtr = false ;
}
}
}
}
@ -1180,6 +1211,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
*/
LWLockRelease ( newPartitionLock ) ;
LWLockRelease ( newPartitionLock ) ;
* io_context = IOContextForStrategy ( strategy ) ;
/* Loop here in case we have to try another victim buffer */
/* Loop here in case we have to try another victim buffer */
for ( ; ; )
for ( ; ; )
{
{
@ -1193,7 +1226,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* Select a victim buffer . The buffer is returned with its header
* Select a victim buffer . The buffer is returned with its header
* spinlock still held !
* spinlock still held !
*/
*/
buf = StrategyGetBuffer ( strategy , & buf_state ) ;
buf = StrategyGetBuffer ( strategy , & buf_state , & from_ring ) ;
Assert ( BUF_STATE_GET_REFCOUNT ( buf_state ) = = 0 ) ;
Assert ( BUF_STATE_GET_REFCOUNT ( buf_state ) = = 0 ) ;
@ -1247,7 +1280,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
UnlockBufHdr ( buf , buf_state ) ;
UnlockBufHdr ( buf , buf_state ) ;
if ( XLogNeedsFlush ( lsn ) & &
if ( XLogNeedsFlush ( lsn ) & &
StrategyRejectBuffer ( strategy , buf ) )
StrategyRejectBuffer ( strategy , buf , from_ring ) )
{
{
/* Drop lock/pin and loop around for another buffer */
/* Drop lock/pin and loop around for another buffer */
LWLockRelease ( BufferDescriptorGetContentLock ( buf ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( buf ) ) ;
@ -1262,7 +1295,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr - > smgr_rlocator . locator . dbOid ,
smgr - > smgr_rlocator . locator . dbOid ,
smgr - > smgr_rlocator . locator . relNumber ) ;
smgr - > smgr_rlocator . locator . relNumber ) ;
FlushBuffer ( buf , NULL ) ;
FlushBuffer ( buf , NULL , IOOBJECT_RELATION , * io_context ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( buf ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( buf ) ) ;
ScheduleBufferTagForWriteback ( & BackendWritebackContext ,
ScheduleBufferTagForWriteback ( & BackendWritebackContext ,
@ -1443,6 +1476,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
LWLockRelease ( newPartitionLock ) ;
LWLockRelease ( newPartitionLock ) ;
if ( oldFlags & BM_VALID )
{
/*
* When a BufferAccessStrategy is in use , blocks evicted from shared
* buffers are counted as IOOP_EVICT in the corresponding context
* ( e . g . IOCONTEXT_BULKWRITE ) . Shared buffers are evicted by a
* strategy in two cases : 1 ) while initially claiming buffers for the
* strategy ring 2 ) to replace an existing strategy ring buffer
* because it is pinned or in use and cannot be reused .
*
* Blocks evicted from buffers already in the strategy ring are
* counted as IOOP_REUSE in the corresponding strategy context .
*
* At this point , we can accurately count evictions and reuses ,
* because we have successfully claimed the valid buffer . Previously ,
* we may have been forced to release the buffer due to concurrent
* pinners or erroring out .
*/
pgstat_count_io_op ( IOOBJECT_RELATION , * io_context ,
from_ring ? IOOP_REUSE : IOOP_EVICT ) ;
}
/*
/*
* Buffer contents are currently invalid . Try to obtain the right to
* Buffer contents are currently invalid . Try to obtain the right to
* start I / O . If StartBufferIO returns false , then someone else managed
* start I / O . If StartBufferIO returns false , then someone else managed
@ -2563,7 +2618,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
PinBuffer_Locked ( bufHdr ) ;
PinBuffer_Locked ( bufHdr ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
FlushBuffer ( bufHdr , NULL ) ;
FlushBuffer ( bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
@ -2813,7 +2868,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
* as the second parameter . If not , pass NULL .
* as the second parameter . If not , pass NULL .
*/
*/
static void
static void
FlushBuffer ( BufferDesc * buf , SMgrRelation reln )
FlushBuffer ( BufferDesc * buf , SMgrRelation reln , IOObject io_object ,
IOContext io_context )
{
{
XLogRecPtr recptr ;
XLogRecPtr recptr ;
ErrorContextCallback errcallback ;
ErrorContextCallback errcallback ;
@ -2907,6 +2963,26 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
bufToWrite ,
bufToWrite ,
false ) ;
false ) ;
/*
* When a strategy is in use , only flushes of dirty buffers already in the
* strategy ring are counted as strategy writes ( IOCONTEXT
* [ BULKREAD | BULKWRITE | VACUUM ] IOOP_WRITE ) for the purpose of IO
* statistics tracking .
*
* If a shared buffer initially added to the ring must be flushed before
* being used , this is counted as an IOCONTEXT_NORMAL IOOP_WRITE .
*
* If a shared buffer which was added to the ring later because the
* current strategy buffer is pinned or in use or because all strategy
* buffers were dirty and rejected ( for BAS_BULKREAD operations only )
* requires flushing , this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
* ( from_ring will be false ) .
*
* When a strategy is not in use , the write can only be a " regular " write
* of a dirty shared buffer ( IOCONTEXT_NORMAL IOOP_WRITE ) .
*/
pgstat_count_io_op ( IOOBJECT_RELATION , io_context , IOOP_WRITE ) ;
if ( track_io_timing )
if ( track_io_timing )
{
{
INSTR_TIME_SET_CURRENT ( io_time ) ;
INSTR_TIME_SET_CURRENT ( io_time ) ;
@ -3549,6 +3625,8 @@ FlushRelationBuffers(Relation rel)
buf_state & = ~ ( BM_DIRTY | BM_JUST_DIRTIED ) ;
buf_state & = ~ ( BM_DIRTY | BM_JUST_DIRTIED ) ;
pg_atomic_unlocked_write_u32 ( & bufHdr - > state , buf_state ) ;
pg_atomic_unlocked_write_u32 ( & bufHdr - > state , buf_state ) ;
pgstat_count_io_op ( IOOBJECT_TEMP_RELATION , IOCONTEXT_NORMAL , IOOP_WRITE ) ;
/* Pop the error context stack */
/* Pop the error context stack */
error_context_stack = errcallback . previous ;
error_context_stack = errcallback . previous ;
}
}
@ -3581,7 +3659,7 @@ FlushRelationBuffers(Relation rel)
{
{
PinBuffer_Locked ( bufHdr ) ;
PinBuffer_Locked ( bufHdr ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
FlushBuffer ( bufHdr , RelationGetSmgr ( rel ) ) ;
FlushBuffer ( bufHdr , RelationGetSmgr ( rel ) , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
UnpinBuffer ( bufHdr ) ;
UnpinBuffer ( bufHdr ) ;
}
}
@ -3679,7 +3757,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
{
{
PinBuffer_Locked ( bufHdr ) ;
PinBuffer_Locked ( bufHdr ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
FlushBuffer ( bufHdr , srelent - > srel ) ;
FlushBuffer ( bufHdr , srelent - > srel , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
UnpinBuffer ( bufHdr ) ;
UnpinBuffer ( bufHdr ) ;
}
}
@ -3889,7 +3967,7 @@ FlushDatabaseBuffers(Oid dbid)
{
{
PinBuffer_Locked ( bufHdr ) ;
PinBuffer_Locked ( bufHdr ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
LWLockAcquire ( BufferDescriptorGetContentLock ( bufHdr ) , LW_SHARED ) ;
FlushBuffer ( bufHdr , NULL ) ;
FlushBuffer ( bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
LWLockRelease ( BufferDescriptorGetContentLock ( bufHdr ) ) ;
UnpinBuffer ( bufHdr ) ;
UnpinBuffer ( bufHdr ) ;
}
}
@ -3916,7 +3994,7 @@ FlushOneBuffer(Buffer buffer)
Assert ( LWLockHeldByMe ( BufferDescriptorGetContentLock ( bufHdr ) ) ) ;
Assert ( LWLockHeldByMe ( BufferDescriptorGetContentLock ( bufHdr ) ) ) ;
FlushBuffer ( bufHdr , NULL ) ;
FlushBuffer ( bufHdr , NULL , IOOBJECT_RELATION , IOCONTEXT_NORMAL ) ;
}
}
/*
/*