@ -65,6 +65,15 @@
# define DROP_RELS_BSEARCH_THRESHOLD 20
typedef struct PrivateRefCountEntry
{
Buffer buffer ;
int32 refcount ;
} PrivateRefCountEntry ;
/* 64 bytes, about the size of a cache line on common systems */
# define REFCOUNT_ARRAY_ENTRIES 8
/* GUC variables */
bool zero_damaged_pages = false ;
int bgwriter_lru_maxpages = 100 ;
@ -85,6 +94,281 @@ static bool IsForInput;
/* local state for LockBufferForCleanup */
static volatile BufferDesc * PinCountWaitBuf = NULL ;
/*
* Backend - Private refcount management :
*
* Each buffer also has a private refcount that keeps track of the number of
* times the buffer is pinned in the current process . This is so that the
* shared refcount needs to be modified only once if a buffer is pinned more
* than once by a individual backend . It ' s also used to check that no buffers
* are still pinned at the end of transactions and when exiting .
*
*
* To avoid - as we used to - requiring an array with NBuffers entries to keep
* track of local buffers we use a small sequentially searched array
* ( PrivateRefCountArray ) and a overflow hash table ( PrivateRefCountHash ) to
* keep track of backend local pins .
*
* Until no more than REFCOUNT_ARRAY_ENTRIES buffers are pinned at once , all
* refcounts are kept track of in the array ; after that , new array entries
* displace old ones into the hash table . That way a frequently used entry
* can ' t get " stuck " in the hashtable while infrequent ones clog the array .
*
* Note that in most scenarios the number of pinned buffers will not exceed
* REFCOUNT_ARRAY_ENTRIES .
*/
static struct PrivateRefCountEntry PrivateRefCountArray [ REFCOUNT_ARRAY_ENTRIES ] ;
static HTAB * PrivateRefCountHash = NULL ;
static int32 PrivateRefCountOverflowed = 0 ;
static uint32 PrivateRefCountClock = 0 ;
static PrivateRefCountEntry * GetPrivateRefCountEntry ( Buffer buffer , bool create , bool do_move ) ;
static inline int32 GetPrivateRefCount ( Buffer buffer ) ;
static void ForgetPrivateRefCountEntry ( PrivateRefCountEntry * ref ) ;
/*
* Return the PrivateRefCount entry for the passed buffer .
*
* Returns NULL if create = false is passed and the buffer doesn ' t have a
* PrivateRefCount entry ; allocates a new PrivateRefCountEntry if currently
* none exists and create = true is passed .
*
* If do_move is true - only allowed for create = false - the entry is
* optimized for frequent access .
*
* When a returned refcount entry isn ' t used anymore it has to be forgotten ,
* using ForgetPrivateRefCountEntry ( ) .
*
* Only works for shared buffers .
*/
static PrivateRefCountEntry *
GetPrivateRefCountEntry ( Buffer buffer , bool create , bool do_move )
{
PrivateRefCountEntry * res ;
PrivateRefCountEntry * free = NULL ;
bool found = false ;
int i ;
Assert ( ! create | | do_move ) ;
Assert ( BufferIsValid ( buffer ) ) ;
Assert ( ! BufferIsLocal ( buffer ) ) ;
/*
* First search for references in the array , that ' ll be sufficient in the
* majority of cases .
*/
for ( i = 0 ; i < REFCOUNT_ARRAY_ENTRIES ; i + + )
{
res = & PrivateRefCountArray [ i ] ;
if ( res - > buffer = = buffer )
return res ;
/* Remember where to put a new refcount, should it become necessary. */
if ( free = = NULL & & res - > buffer = = InvalidBuffer )
free = res ;
}
/*
* By here we know that the buffer , if already pinned , isn ' t residing in
* the array .
*/
res = NULL ;
found = false ;
/*
* Look up the buffer in the hashtable if we ' ve previously overflowed into
* it .
*/
if ( PrivateRefCountOverflowed > 0 )
{
res = hash_search ( PrivateRefCountHash ,
( void * ) & buffer ,
HASH_FIND ,
& found ) ;
}
if ( ! found )
{
if ( ! create )
{
/* Neither array nor hash have an entry and no new entry is needed */
return NULL ;
}
else if ( free ! = NULL )
{
/* add entry into the free array slot */
free - > buffer = buffer ;
free - > refcount = 0 ;
return free ;
}
else
{
/*
* Move entry from the current clock position in the array into the
* hashtable . Use that slot .
*/
PrivateRefCountEntry * arrayent ;
PrivateRefCountEntry * hashent ;
/* select victim slot */
arrayent = & PrivateRefCountArray [
PrivateRefCountClock + + % REFCOUNT_ARRAY_ENTRIES ] ;
Assert ( arrayent - > buffer ! = InvalidBuffer ) ;
/* enter victim array entry into hashtable */
hashent = hash_search ( PrivateRefCountHash ,
( void * ) & arrayent - > buffer ,
HASH_ENTER ,
& found ) ;
Assert ( ! found ) ;
hashent - > refcount = arrayent - > refcount ;
/* fill the now free array slot */
arrayent - > buffer = buffer ;
arrayent - > refcount = 0 ;
PrivateRefCountOverflowed + + ;
return arrayent ;
}
}
else
{
if ( ! do_move )
{
return res ;
}
else if ( found & & free ! = NULL )
{
/* move buffer from hashtable into the free array slot */
/* fill array slot */
free - > buffer = buffer ;
free - > refcount = res - > refcount ;
/* delete from hashtable */
hash_search ( PrivateRefCountHash ,
( void * ) & buffer ,
HASH_REMOVE ,
& found ) ;
Assert ( found ) ;
Assert ( PrivateRefCountOverflowed > 0 ) ;
PrivateRefCountOverflowed - - ;
return free ;
}
else
{
/*
* Swap the entry in the hash table with the one in the array at the
* current clock position .
*/
PrivateRefCountEntry * arrayent ;
PrivateRefCountEntry * hashent ;
/* select victim slot */
arrayent = & PrivateRefCountArray [
PrivateRefCountClock + + % REFCOUNT_ARRAY_ENTRIES ] ;
Assert ( arrayent - > buffer ! = InvalidBuffer ) ;
/* enter victim entry into the hashtable */
hashent = hash_search ( PrivateRefCountHash ,
( void * ) & arrayent - > buffer ,
HASH_ENTER ,
& found ) ;
Assert ( ! found ) ;
hashent - > refcount = arrayent - > refcount ;
/* fill now free array entry with previously searched entry */
arrayent - > buffer = res - > buffer ;
arrayent - > refcount = res - > refcount ;
/* and remove the old entry */
hash_search ( PrivateRefCountHash ,
( void * ) & arrayent - > buffer ,
HASH_REMOVE ,
& found ) ;
Assert ( found ) ;
/* PrivateRefCountOverflowed stays the same -1 + +1 = 0*/
return arrayent ;
}
}
Assert ( false ) ; /* unreachable */
return NULL ;
}
/*
* Returns how many times the passed buffer is pinned by this backend .
*
* Only works for shared memory buffers !
*/
static inline int32
GetPrivateRefCount ( Buffer buffer )
{
PrivateRefCountEntry * ref ;
Assert ( BufferIsValid ( buffer ) ) ;
Assert ( ! BufferIsLocal ( buffer ) ) ;
ref = GetPrivateRefCountEntry ( buffer , false , false ) ;
if ( ref = = NULL )
return 0 ;
return ref - > refcount ;
}
/*
* Release resources used to track the reference count of a buffer which we no
* longer have pinned and don ' t want to pin again immediately .
*/
static void
ForgetPrivateRefCountEntry ( PrivateRefCountEntry * ref )
{
Assert ( ref - > refcount = = 0 ) ;
if ( ref > = & PrivateRefCountArray [ 0 ] & &
ref < & PrivateRefCountArray [ REFCOUNT_ARRAY_ENTRIES ] )
{
ref - > buffer = InvalidBuffer ;
}
else
{
bool found ;
Buffer buffer = ref - > buffer ;
hash_search ( PrivateRefCountHash ,
( void * ) & buffer ,
HASH_REMOVE ,
& found ) ;
Assert ( found ) ;
Assert ( PrivateRefCountOverflowed > 0 ) ;
PrivateRefCountOverflowed - - ;
}
}
/*
* BufferIsPinned
* True iff the buffer is pinned ( also checks for valid buffer number ) .
*
* NOTE : what we check here is that * this * backend holds a pin on
* the buffer . We do not care whether some other backend does .
*/
# define BufferIsPinned(bufnum) \
( \
! BufferIsValid ( bufnum ) ? \
false \
: \
BufferIsLocal ( bufnum ) ? \
( LocalRefCount [ - ( bufnum ) - 1 ] > 0 ) \
: \
( GetPrivateRefCount ( bufnum ) > 0 ) \
)
static Buffer ReadBuffer_common ( SMgrRelation reln , char relpersistence ,
ForkNumber forkNum , BlockNumber blockNum ,
@ -940,7 +1224,7 @@ retry:
UnlockBufHdr ( buf ) ;
LWLockRelease ( oldPartitionLock ) ;
/* safety check: should definitely not be our *own* pin */
if ( PrivateRefCount [ buf - > buf_id ] ! = 0 )
if ( GetPrivateRefCount ( buf - > buf_id ) > 0 )
elog ( ERROR , " buffer is pinned in InvalidateBuffer " ) ;
WaitIO ( buf ) ;
goto retry ;
@ -999,7 +1283,7 @@ MarkBufferDirty(Buffer buffer)
bufHdr = & BufferDescriptors [ buffer - 1 ] ;
Assert ( PrivateRefCount [ buffer - 1 ] > 0 ) ;
Assert ( BufferIsPinned ( buffer ) ) ;
/* unfortunately we can't check if the lock is held exclusively */
Assert ( LWLockHeldByMe ( bufHdr - > content_lock ) ) ;
@ -1046,9 +1330,9 @@ ReleaseAndReadBuffer(Buffer buffer,
if ( BufferIsValid ( buffer ) )
{
Assert ( BufferIsPinned ( buffer ) ) ;
if ( BufferIsLocal ( buffer ) )
{
Assert ( LocalRefCount [ - buffer - 1 ] > 0 ) ;
bufHdr = & LocalBufferDescriptors [ - buffer - 1 ] ;
if ( bufHdr - > tag . blockNum = = blockNum & &
RelFileNodeEquals ( bufHdr - > tag . rnode , relation - > rd_node ) & &
@ -1059,7 +1343,6 @@ ReleaseAndReadBuffer(Buffer buffer,
}
else
{
Assert ( PrivateRefCount [ buffer - 1 ] > 0 ) ;
bufHdr = & BufferDescriptors [ buffer - 1 ] ;
/* we have pin, so it's ok to examine tag without spinlock */
if ( bufHdr - > tag . blockNum = = blockNum & &
@ -1096,8 +1379,11 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
{
int b = buf - > buf_id ;
bool result ;
PrivateRefCountEntry * ref ;
if ( PrivateRefCount [ b ] = = 0 )
ref = GetPrivateRefCountEntry ( b + 1 , true , true ) ;
if ( ref - > refcount = = 0 )
{
LockBufHdr ( buf ) ;
buf - > refcount + + ;
@ -1119,8 +1405,9 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
/* If we previously pinned the buffer, it must surely be valid */
result = true ;
}
PrivateRefCount [ b ] + + ;
Assert ( PrivateRefCount [ b ] > 0 ) ;
ref - > refcount + + ;
Assert ( ref - > refcount > 0 ) ;
ResourceOwnerRememberBuffer ( CurrentResourceOwner ,
BufferDescriptorGetBuffer ( buf ) ) ;
return result ;
@ -1143,12 +1430,15 @@ static void
PinBuffer_Locked ( volatile BufferDesc * buf )
{
int b = buf - > buf_id ;
PrivateRefCountEntry * ref ;
ref = GetPrivateRefCountEntry ( b + 1 , true , true ) ;
if ( PrivateRefCount [ b ] = = 0 )
if ( ref - > refcount = = 0 )
buf - > refcount + + ;
UnlockBufHdr ( buf ) ;
PrivateRefCount [ b ] + + ;
Assert ( PrivateRefCount [ b ] > 0 ) ;
ref - > refcount + + ;
Assert ( ref - > refcount > 0 ) ;
ResourceOwnerRememberBuffer ( CurrentResourceOwner ,
BufferDescriptorGetBuffer ( buf ) ) ;
}
@ -1164,15 +1454,19 @@ PinBuffer_Locked(volatile BufferDesc *buf)
static void
UnpinBuffer ( volatile BufferDesc * buf , bool fixOwner )
{
PrivateRefCountEntry * ref ;
int b = buf - > buf_id ;
ref = GetPrivateRefCountEntry ( b + 1 , false , false ) ;
Assert ( ref ! = NULL ) ;
if ( fixOwner )
ResourceOwnerForgetBuffer ( CurrentResourceOwner ,
BufferDescriptorGetBuffer ( buf ) ) ;
Assert ( PrivateRefCount [ b ] > 0 ) ;
PrivateRefCount [ b ] - - ;
if ( PrivateRefCount [ b ] = = 0 )
Assert ( ref - > refcount > 0 ) ;
ref - > refcount - - ;
if ( ref - > refcount = = 0 )
{
/* I'd better not still hold any locks on the buffer */
Assert ( ! LWLockHeldByMe ( buf - > content_lock ) ) ;
@ -1197,6 +1491,8 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
}
else
UnlockBufHdr ( buf ) ;
ForgetPrivateRefCountEntry ( ref ) ;
}
}
@ -1702,6 +1998,10 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
/*
* AtEOXact_Buffers - clean up at end of transaction .
*
* As of PostgreSQL 8.0 , buffer pins should get released by the
* ResourceOwner mechanism . This routine is just a debugging
* cross - check that no pins remain .
*/
void
AtEOXact_Buffers ( bool isCommit )
@ -1709,6 +2009,36 @@ AtEOXact_Buffers(bool isCommit)
CheckForBufferLeaks ( ) ;
AtEOXact_LocalBuffers ( isCommit ) ;
Assert ( PrivateRefCountOverflowed = = 0 ) ;
}
/*
* Initialize access to shared buffer pool
*
* This is called during backend startup ( whether standalone or under the
* postmaster ) . It sets up for this backend ' s access to the already - existing
* buffer pool .
*
* NB : this is called before InitProcess ( ) , so we do not have a PGPROC and
* cannot do LWLockAcquire ; hence we can ' t actually access stuff in
* shared memory yet . We are only initializing local data here .
* ( See also InitBufferPoolBackend )
*/
void
InitBufferPoolAccess ( void )
{
HASHCTL hash_ctl ;
memset ( & PrivateRefCountArray , 0 , sizeof ( PrivateRefCountArray ) ) ;
MemSet ( & hash_ctl , 0 , sizeof ( hash_ctl ) ) ;
hash_ctl . keysize = sizeof ( int32 ) ;
hash_ctl . entrysize = sizeof ( PrivateRefCountArray ) ;
hash_ctl . hash = oid_hash ; /* a bit more efficient than tag_hash */
PrivateRefCountHash = hash_create ( " PrivateRefCount " , 100 , & hash_ctl ,
HASH_ELEM | HASH_FUNCTION ) ;
}
/*
@ -1754,16 +2084,34 @@ CheckForBufferLeaks(void)
{
# ifdef USE_ASSERT_CHECKING
int RefCountErrors = 0 ;
Buffer b ;
PrivateRefCountEntry * res ;
int i ;
/* check the array */
for ( i = 0 ; i < REFCOUNT_ARRAY_ENTRIES ; i + + )
{
res = & PrivateRefCountArray [ i ] ;
if ( res - > buffer ! = InvalidBuffer )
{
PrintBufferLeakWarning ( res - > buffer ) ;
RefCountErrors + + ;
}
}
for ( b = 1 ; b < = NBuffers ; b + + )
/* if neccessary search the hash */
if ( PrivateRefCountOverflowed )
{
if ( PrivateRefCount [ b - 1 ] ! = 0 )
HASH_SEQ_STATUS hstat ;
hash_seq_init ( & hstat , PrivateRefCountHash ) ;
while ( ( res = ( PrivateRefCountEntry * ) hash_seq_search ( & hstat ) ) ! = NULL )
{
PrintBufferLeakWarning ( b ) ;
PrintBufferLeakWarning ( res - > buffer ) ;
RefCountErrors + + ;
}
}
Assert ( RefCountErrors = = 0 ) ;
# endif
}
@ -1789,7 +2137,7 @@ PrintBufferLeakWarning(Buffer buffer)
else
{
buf = & BufferDescriptors [ buffer - 1 ] ;
loccount = PrivateRefCount [ buffer - 1 ] ;
loccount = GetPrivateRefCount ( buffer ) ;
backend = InvalidBackendId ;
}
@ -2329,7 +2677,7 @@ PrintBufferDescs(void)
i , buf - > freeNext ,
relpathbackend ( buf - > tag . rnode , InvalidBackendId , buf - > tag . forkNum ) ,
buf - > tag . blockNum , buf - > flags ,
buf - > refcount , PrivateRefCount [ i ] ) ;
buf - > refcount , GetPrivateRefCount ( i ) ) ;
}
}
# endif
@ -2343,7 +2691,7 @@ PrintPinnedBufs(void)
for ( i = 0 ; i < NBuffers ; + + i , + + buf )
{
if ( PrivateRefCount [ i ] > 0 )
if ( GetPrivateRefCount ( i + 1 ) > 0 )
{
/* theoretically we should lock the bufhdr here */
elog ( LOG ,
@ -2352,7 +2700,7 @@ PrintPinnedBufs(void)
i , buf - > freeNext ,
relpath ( buf - > tag . rnode , buf - > tag . forkNum ) ,
buf - > tag . blockNum , buf - > flags ,
buf - > refcount , PrivateRefCount [ i ] ) ;
buf - > refcount , GetPrivateRefCount ( i + 1 ) ) ;
}
}
}
@ -2509,6 +2857,7 @@ void
ReleaseBuffer ( Buffer buffer )
{
volatile BufferDesc * bufHdr ;
PrivateRefCountEntry * ref ;
if ( ! BufferIsValid ( buffer ) )
elog ( ERROR , " bad buffer ID: %d " , buffer ) ;
@ -2524,10 +2873,12 @@ ReleaseBuffer(Buffer buffer)
bufHdr = & BufferDescriptors [ buffer - 1 ] ;
Assert ( PrivateRefCount [ buffer - 1 ] > 0 ) ;
ref = GetPrivateRefCountEntry ( buffer , false , false ) ;
Assert ( ref ! = NULL ) ;
Assert ( ref - > refcount > 0 ) ;
if ( PrivateRefCount [ buffer - 1 ] > 1 )
PrivateRefCount [ buffer - 1 ] - - ;
if ( ref - > refcount > 1 )
ref - > refcount - - ;
else
UnpinBuffer ( bufHdr , false ) ;
}
@ -2561,7 +2912,12 @@ IncrBufferRefCount(Buffer buffer)
if ( BufferIsLocal ( buffer ) )
LocalRefCount [ - buffer - 1 ] + + ;
else
PrivateRefCount [ buffer - 1 ] + + ;
{
PrivateRefCountEntry * ref ;
ref = GetPrivateRefCountEntry ( buffer , false , true ) ;
Assert ( ref ! = NULL ) ;
ref - > refcount + + ;
}
}
/*
@ -2595,7 +2951,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
bufHdr = & BufferDescriptors [ buffer - 1 ] ;
Assert ( PrivateRefCount [ buffer - 1 ] > 0 ) ;
Assert ( GetPrivateRefCount ( buffer ) > 0 ) ;
/* here, either share or exclusive lock is OK */
Assert ( LWLockHeldByMe ( bufHdr - > content_lock ) ) ;
@ -2813,9 +3169,9 @@ LockBufferForCleanup(Buffer buffer)
}
/* There should be exactly one local pin */
if ( PrivateRefCount [ buffer - 1 ] ! = 1 )
if ( GetPrivateRefCount ( buffer ) ! = 1 )
elog ( ERROR , " incorrect local pin count: %d " ,
PrivateRefCount [ buffer - 1 ] ) ;
GetPrivateRefCount ( buffer ) ) ;
bufHdr = & BufferDescriptors [ buffer - 1 ] ;
@ -2880,7 +3236,7 @@ HoldingBufferPinThatDelaysRecovery(void)
if ( bufid < 0 )
return false ;
if ( PrivateRefCount [ bufid ] > 0 )
if ( GetPrivateRefCount ( bufid + 1 ) > 0 )
return true ;
return false ;
@ -2910,8 +3266,8 @@ ConditionalLockBufferForCleanup(Buffer buffer)
}
/* There should be exactly one local pin */
Assert ( PrivateRefCount [ buffer - 1 ] > 0 ) ;
if ( PrivateRefCount [ buffer - 1 ] ! = 1 )
Assert ( GetPrivateRefCount ( buffer ) > 0 ) ;
if ( GetPrivateRefCount ( buffer ) ! = 1 )
return false ;
/* Try to acquire lock */