@ -3,49 +3,6 @@
* slru . c
* Simple LRU buffering for transaction status logfiles
*
* Portions Copyright ( c ) 1996 - 2003 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
* $ PostgreSQL : pgsql / src / backend / access / transam / slru . c , v 1.18 2004 / 07 / 21 22 : 31 : 20 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# include "postgres.h"
# include <fcntl.h>
# include <sys/stat.h>
# include <unistd.h>
# include "access/clog.h"
# include "access/slru.h"
# include "access/subtrans.h"
# include "postmaster/bgwriter.h"
# include "storage/fd.h"
# include "storage/lwlock.h"
# include "storage/shmem.h"
# include "miscadmin.h"
/*
* Define segment size . A page is the same BLCKSZ as is used everywhere
* else in Postgres . The segment size can be chosen somewhat arbitrarily ;
* we make it 32 pages by default , or 256 Kb , i . e . 1 M transactions for CLOG
* or 64 K transactions for SUBTRANS .
*
* Note : because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF ,
* page numbering also wraps around at 0xFFFFFFFF / xxxx_XACTS_PER_PAGE ( where
* xxxx is CLOG or SUBTRANS , respectively ) , and segment numbering at
* 0xFFFFFFFF / xxxx_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT . We need
* take no explicit notice of that fact in this module , except when comparing
* segment and page numbers in SimpleLruTruncate ( see PagePrecedes ( ) ) .
*/
# define SLRU_PAGES_PER_SEGMENT 32
/*----------
* Shared - memory data structures for SLRU control
*
* We use a simple least - recently - used scheme to manage a pool of page
* buffers . Under ordinary circumstances we expect that write
* traffic will occur mostly to the latest page ( and to the just - prior
@ -86,44 +43,46 @@
* to re - dirty a page that is currently being written out . This is handled
* by setting the page ' s state from WRITE_IN_PROGRESS to DIRTY . The writing
* process must notice this and not mark the page CLEAN when it ' s done .
* - - - - - - - - - -
*
*
* Portions Copyright ( c ) 1996 - 2003 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
* $ PostgreSQL : pgsql / src / backend / access / transam / slru . c , v 1.19 2004 / 08 / 23 23 : 22 : 44 tgl Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# include "postgres.h"
typedef enum
{
SLRU_PAGE_EMPTY , /* buffer is not in use */
SLRU_PAGE_READ_IN_PROGRESS , /* page is being read in */
SLRU_PAGE_CLEAN , /* page is valid and not dirty */
SLRU_PAGE_DIRTY , /* page is valid but needs write */
SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */
} SlruPageStatus ;
# include <fcntl.h>
# include <sys/stat.h>
# include <unistd.h>
/*
* Shared - memory state
*/
typedef struct SlruSharedData
{
LWLockId ControlLock ;
# include "access/slru.h"
# include "access/xlog.h"
# include "storage/fd.h"
# include "storage/shmem.h"
# include "miscadmin.h"
/*
* Info for each buffer slot . Page number is undefined when status is
* EMPTY . lru_count is essentially the number of page switches since
* last use of this page ; the page with highest lru_count is the best
* candidate to replace .
*/
char * page_buffer [ NUM_CLOG_BUFFERS ] ;
SlruPageStatus page_status [ NUM_CLOG_BUFFERS ] ;
int page_number [ NUM_CLOG_BUFFERS ] ;
unsigned int page_lru_count [ NUM_CLOG_BUFFERS ] ;
LWLockId BufferLocks [ NUM_CLOG_BUFFERS ] ; /* Per-buffer I/O locks */
/*
* latest_page_number is the page number of the current end of the
* CLOG ; this is not critical data , since we use it only to avoid
* swapping out the latest page .
*/
int latest_page_number ;
} SlruSharedData ;
/*
* Define segment size . A page is the same BLCKSZ as is used everywhere
* else in Postgres . The segment size can be chosen somewhat arbitrarily ;
* we make it 32 pages by default , or 256 Kb , i . e . 1 M transactions for CLOG
* or 64 K transactions for SUBTRANS .
*
* Note : because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF ,
* page numbering also wraps around at 0xFFFFFFFF / xxxx_XACTS_PER_PAGE ( where
* xxxx is CLOG or SUBTRANS , respectively ) , and segment numbering at
* 0xFFFFFFFF / xxxx_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT . We need
* take no explicit notice of that fact in this module , except when comparing
* segment and page numbers in SimpleLruTruncate ( see PagePrecedes ( ) ) .
*
* Note : this file currently assumes that segment file names will be four
* hex digits . This sets a lower bound on the segment size ( 64 K transactions
* for 32 - bit TransactionIds ) .
*/
# define SLRU_PAGES_PER_SEGMENT 32
# define SlruFileName(ctl, path, seg) \
snprintf ( path , MAXPGPATH , " %s/%04X " , ( ctl ) - > Dir , seg )
@ -138,8 +97,8 @@ typedef struct SlruSharedData
typedef struct SlruFlushData
{
int num_files ; /* # files actually open */
int fd [ NUM_CLOG _BUFFERS ] ; /* their FD's */
int segno [ NUM_CLOG _BUFFERS ] ; /* their c log seg#s */
int fd [ NUM_SLRU _BUFFERS ] ; /* their FD's */
int segno [ NUM_SLRU _BUFFERS ] ; /* their log seg#s */
} SlruFlushData ;
/*
@ -149,7 +108,7 @@ typedef struct SlruFlushData
do { \
if ( ( shared ) - > page_lru_count [ slotno ] ! = 0 ) { \
int iilru ; \
for ( iilru = 0 ; iilru < NUM_CLOG _BUFFERS ; iilru + + ) \
for ( iilru = 0 ; iilru < NUM_SLRU _BUFFERS ; iilru + + ) \
( shared ) - > page_lru_count [ iilru ] + + ; \
( shared ) - > page_lru_count [ slotno ] = 0 ; \
} \
@ -176,7 +135,6 @@ static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
SlruFlush fdata ) ;
static void SlruReportIOError ( SlruCtl ctl , int pageno , TransactionId xid ) ;
static int SlruSelectLRUPage ( SlruCtl ctl , int pageno ) ;
static bool SlruScanDirectory ( SlruCtl ctl , int cutoffPage , bool doDeletions ) ;
/*
@ -186,11 +144,12 @@ static bool SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions);
int
SimpleLruShmemSize ( void )
{
return MAX ALIGN( sizeof ( SlruSharedData ) ) + BLCKSZ * NUM_CLOG _BUFFERS ;
return BUFFER ALIGN( sizeof ( SlruSharedData ) ) + BLCKSZ * NUM_SLRU _BUFFERS ;
}
void
SimpleLruInit ( SlruCtl ctl , const char * name , const char * subdir )
SimpleLruInit ( SlruCtl ctl , const char * name ,
LWLockId ctllock , const char * subdir )
{
SlruShared shared ;
bool found ;
@ -207,16 +166,16 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
memset ( shared , 0 , sizeof ( SlruSharedData ) ) ;
shared - > ControlLock = LWLockAssign ( ) ;
shared - > ControlLock = ctllock ;
bufptr = ( char * ) shared + MAX ALIGN( sizeof ( SlruSharedData ) ) ;
bufptr = ( char * ) shared + BUFFER ALIGN( sizeof ( SlruSharedData ) ) ;
for ( slotno = 0 ; slotno < NUM_CLOG _BUFFERS ; slotno + + )
for ( slotno = 0 ; slotno < NUM_SLRU _BUFFERS ; slotno + + )
{
shared - > page_buffer [ slotno ] = bufptr ;
shared - > page_status [ slotno ] = SLRU_PAGE_EMPTY ;
shared - > page_lru_count [ slotno ] = 1 ;
shared - > BufferL ocks[ slotno ] = LWLockAssign ( ) ;
shared - > buffer_l ocks[ slotno ] = LWLockAssign ( ) ;
bufptr + = BLCKSZ ;
}
@ -225,11 +184,12 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
else
Assert ( found ) ;
/* Initialize the unshared control struct */
/*
* Initialize the unshared control struct , including directory path .
* We assume caller set PagePrecedes .
*/
ctl - > shared = shared ;
ctl - > ControlLock = shared - > ControlLock ;
/* Initialize unshared copy of directory path */
ctl - > do_fsync = true ; /* default behavior */
snprintf ( ctl - > Dir , MAXPGPATH , " %s/%s " , DataDir , subdir ) ;
}
@ -244,8 +204,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, const char *subdir)
int
SimpleLruZeroPage ( SlruCtl ctl , int pageno )
{
int slotno ;
SlruShared shared = ctl - > shared ;
int slotno ;
/* Find a suitable buffer slot for the page */
slotno = SlruSelectLRUPage ( ctl , pageno ) ;
@ -274,14 +234,13 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno)
* The passed - in xid is used only for error reporting , and may be
* InvalidTransactionId if no specific xid is associated with the action .
*
* Return value is the shared - buffer address of the page .
* Return value is the shared - buffer slot number now holding the page .
* The buffer ' s LRU access info is updated .
* If forwrite is true , the buffer is marked as dirty .
*
* Control lock must be held at entry , and will be held at exit .
*/
char *
SimpleLruReadPage ( SlruCtl ctl , int pageno , TransactionId xid , bool forwrite )
int
SimpleLruReadPage ( SlruCtl ctl , int pageno , TransactionId xid )
{
SlruShared shared = ctl - > shared ;
@ -303,9 +262,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
{
/* otherwise, it's ready to use */
SlruRecentlyUsed ( shared , slotno ) ;
if ( forwrite )
shared - > page_status [ slotno ] = SLRU_PAGE_DIRTY ;
return shared - > page_buffer [ slotno ] ;
return slotno ;
}
}
else
@ -327,7 +284,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
/* Release shared lock, grab per-buffer lock instead */
LWLockRelease ( shared - > ControlLock ) ;
LWLockAcquire ( shared - > BufferL ocks[ slotno ] , LW_EXCLUSIVE ) ;
LWLockAcquire ( shared - > buffer_l ocks[ slotno ] , LW_EXCLUSIVE ) ;
/*
* Check to see if someone else already did the read , or took the
@ -336,7 +293,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
if ( shared - > page_number [ slotno ] ! = pageno | |
shared - > page_status [ slotno ] ! = SLRU_PAGE_READ_IN_PROGRESS )
{
LWLockRelease ( shared - > BufferL ocks[ slotno ] ) ;
LWLockRelease ( shared - > buffer_l ocks[ slotno ] ) ;
LWLockAcquire ( shared - > ControlLock , LW_EXCLUSIVE ) ;
continue ;
}
@ -352,16 +309,14 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
shared - > page_status [ slotno ] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_EMPTY ;
LWLockRelease ( shared - > BufferL ocks[ slotno ] ) ;
LWLockRelease ( shared - > buffer_l ocks[ slotno ] ) ;
/* Now it's okay to ereport if we failed */
if ( ! ok )
SlruReportIOError ( ctl , pageno , xid ) ;
SlruRecentlyUsed ( shared , slotno ) ;
if ( forwrite )
shared - > page_status [ slotno ] = SLRU_PAGE_DIRTY ;
return shared - > page_buffer [ slotno ] ;
return slotno ;
}
}
@ -379,9 +334,9 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid, bool forwrite)
void
SimpleLruWritePage ( SlruCtl ctl , int slotno , SlruFlush fdata )
{
SlruShared shared = ctl - > shared ;
int pageno ;
bool ok ;
SlruShared shared = ctl - > shared ;
/* Do nothing if page does not need writing */
if ( shared - > page_status [ slotno ] ! = SLRU_PAGE_DIRTY & &
@ -392,7 +347,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
/* Release shared lock, grab per-buffer lock instead */
LWLockRelease ( shared - > ControlLock ) ;
LWLockAcquire ( shared - > BufferL ocks[ slotno ] , LW_EXCLUSIVE ) ;
LWLockAcquire ( shared - > buffer_l ocks[ slotno ] , LW_EXCLUSIVE ) ;
/*
* Check to see if someone else already did the write , or took the
@ -405,7 +360,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
( shared - > page_status [ slotno ] ! = SLRU_PAGE_DIRTY & &
shared - > page_status [ slotno ] ! = SLRU_PAGE_WRITE_IN_PROGRESS ) )
{
LWLockRelease ( shared - > BufferL ocks[ slotno ] ) ;
LWLockRelease ( shared - > buffer_l ocks[ slotno ] ) ;
LWLockAcquire ( shared - > ControlLock , LW_EXCLUSIVE ) ;
return ;
}
@ -447,7 +402,7 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
if ( shared - > page_status [ slotno ] = = SLRU_PAGE_WRITE_IN_PROGRESS )
shared - > page_status [ slotno ] = ok ? SLRU_PAGE_CLEAN : SLRU_PAGE_DIRTY ;
LWLockRelease ( shared - > BufferL ocks[ slotno ] ) ;
LWLockRelease ( shared - > buffer_l ocks[ slotno ] ) ;
/* Now it's okay to ereport if we failed */
if ( ! ok )
@ -640,7 +595,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
*/
if ( ! fdata )
{
if ( pg_fsync ( fd ) )
if ( ctl - > do_fsync & & pg_fsync ( fd ) )
{
slru_errcause = SLRU_FSYNC_FAILED ;
slru_errno = errno ;
@ -758,7 +713,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
unsigned int bestcount = 0 ;
/* See if page already has a buffer assigned */
for ( slotno = 0 ; slotno < NUM_CLOG _BUFFERS ; slotno + + )
for ( slotno = 0 ; slotno < NUM_SLRU _BUFFERS ; slotno + + )
{
if ( shared - > page_number [ slotno ] = = pageno & &
shared - > page_status [ slotno ] ! = SLRU_PAGE_EMPTY )
@ -769,7 +724,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
* If we find any EMPTY slot , just select that one . Else locate
* the least - recently - used slot that isn ' t the latest page .
*/
for ( slotno = 0 ; slotno < NUM_CLOG _BUFFERS ; slotno + + )
for ( slotno = 0 ; slotno < NUM_SLRU _BUFFERS ; slotno + + )
{
if ( shared - > page_status [ slotno ] = = SLRU_PAGE_EMPTY )
return slotno ;
@ -795,7 +750,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
*/
if ( shared - > page_status [ bestslot ] = = SLRU_PAGE_READ_IN_PROGRESS )
( void ) SimpleLruReadPage ( ctl , shared - > page_number [ bestslot ] ,
InvalidTransactionId , false ) ;
InvalidTransactionId ) ;
else
SimpleLruWritePage ( ctl , bestslot , NULL ) ;
@ -808,18 +763,7 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
}
/*
* This must be called ONCE during postmaster or standalone - backend startup
*/
void
SimpleLruSetLatestPage ( SlruCtl ctl , int pageno )
{
SlruShared shared = ctl - > shared ;
shared - > latest_page_number = pageno ;
}
/*
* This is called during checkpoint and postmaster / standalone - backend shutdown
* Flush dirty pages to disk during checkpoint or database shutdown
*/
void
SimpleLruFlush ( SlruCtl ctl , bool checkpoint )
@ -831,11 +775,14 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
int i ;
bool ok ;
/*
* Find and write dirty pages
*/
fdata . num_files = 0 ;
LWLockAcquire ( shared - > ControlLock , LW_EXCLUSIVE ) ;
for ( slotno = 0 ; slotno < NUM_CLOG _BUFFERS ; slotno + + )
for ( slotno = 0 ; slotno < NUM_SLRU _BUFFERS ; slotno + + )
{
SimpleLruWritePage ( ctl , slotno , & fdata ) ;
@ -857,7 +804,7 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
ok = true ;
for ( i = 0 ; i < fdata . num_files ; i + + )
{
if ( pg_fsync ( fdata . fd [ i ] ) )
if ( ctl - > do_fsync & & pg_fsync ( fdata . fd [ i ] ) )
{
slru_errcause = SLRU_FSYNC_FAILED ;
slru_errno = errno ;
@ -879,40 +826,23 @@ SimpleLruFlush(SlruCtl ctl, bool checkpoint)
/*
* Remove all segments before the one holding the passed page number
*
* When this is called , we know that the database logically contains no
* reference to transaction IDs older than oldestXact . However , we must
* not remove any segment until we have performed a checkpoint , to ensure
* that no such references remain on disk either ; else a crash just after
* the truncation might leave us with a problem . Since CLOG segments hold
* a large number of transactions , the opportunity to actually remove a
* segment is fairly rare , and so it seems best not to do the checkpoint
* unless we have confirmed that there is a removable segment . Therefore
* we issue the checkpoint command here , not in higher - level code as might
* seem cleaner .
*/
void
SimpleLruTruncate ( SlruCtl ctl , int cutoffPage )
{
int slotno ;
SlruShared shared = ctl - > shared ;
int slotno ;
/*
* The cutoff point is the start of the segment containing cutoffPage .
*/
cutoffPage - = cutoffPage % SLRU_PAGES_PER_SEGMENT ;
if ( ! SlruScanDirectory ( ctl , cutoffPage , false ) )
return ; /* nothing to remove */
/* Perform a CHECKPOINT */
RequestCheckpoint ( true ) ;
/*
* Scan shared memory and remove any pages preceding the cutoff page ,
* to ensure we won ' t rewrite them later . ( Any dirty pages should
* have been flushed already during the checkpoint , we ' re just being
* extra careful here . )
* to ensure we won ' t rewrite them later . ( Since this is normally
* called in or just after a checkpoint , any dirty pages should
* have been flushed already . . . we ' re just being extra careful here . )
*/
LWLockAcquire ( shared - > ControlLock , LW_EXCLUSIVE ) ;
@ -933,7 +863,7 @@ restart:;
return ;
}
for ( slotno = 0 ; slotno < NUM_CLOG _BUFFERS ; slotno + + )
for ( slotno = 0 ; slotno < NUM_SLRU _BUFFERS ; slotno + + )
{
if ( shared - > page_status [ slotno ] = = SLRU_PAGE_EMPTY )
continue ;
@ -956,7 +886,7 @@ restart:;
*/
if ( shared - > page_status [ slotno ] = = SLRU_PAGE_READ_IN_PROGRESS )
( void ) SimpleLruReadPage ( ctl , shared - > page_number [ slotno ] ,
InvalidTransactionId , false ) ;
InvalidTransactionId ) ;
else
SimpleLruWritePage ( ctl , slotno , NULL ) ;
goto restart ;
@ -969,11 +899,13 @@ restart:;
}
/*
* SlruTruncate subroutine : scan directory for removable segments .
* Simp leL ruTruncate subroutine : scan directory for removable segments .
* Actually remove them iff doDeletions is true . Return TRUE iff any
* removable segments were found . Note : no locking is needed .
*
* This can be called directly from clog . c , for reasons explained there .
*/
static bool
bool
SlruScanDirectory ( SlruCtl ctl , int cutoffPage , bool doDeletions )
{
bool found = false ;
@ -983,6 +915,13 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
int segpage ;
char path [ MAXPGPATH ] ;
/*
* The cutoff point is the start of the segment containing cutoffPage .
* ( This is redundant when called from SimpleLruTruncate , but not when
* called directly from clog . c . )
*/
cutoffPage - = cutoffPage % SLRU_PAGES_PER_SEGMENT ;
cldir = AllocateDir ( ctl - > Dir ) ;
if ( cldir = = NULL )
ereport ( ERROR ,
@ -1003,10 +942,9 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
found = true ;
if ( doDeletions )
{
ereport ( LOG ,
( errmsg ( " removing file \" %s/%s \" " ,
ctl - > Dir , clde - > d_name ) ) ) ;
snprintf ( path , MAXPGPATH , " %s/%s " , ctl - > Dir , clde - > d_name ) ;
ereport ( LOG ,
( errmsg ( " removing file \" %s \" " , path ) ) ) ;
unlink ( path ) ;
}
}
@ -1027,55 +965,3 @@ SlruScanDirectory(SlruCtl ctl, int cutoffPage, bool doDeletions)
return found ;
}
/*
* SLRU resource manager ' s routines
*/
void
slru_redo ( XLogRecPtr lsn , XLogRecord * record )
{
uint8 info = record - > xl_info & ~ XLR_INFO_MASK ;
int pageno ;
memcpy ( & pageno , XLogRecGetData ( record ) , sizeof ( int ) ) ;
switch ( info )
{
case CLOG_ZEROPAGE :
clog_zeropage_redo ( pageno ) ;
break ;
case SUBTRANS_ZEROPAGE :
subtrans_zeropage_redo ( pageno ) ;
break ;
default :
elog ( PANIC , " slru_redo: unknown op code %u " , info ) ;
}
}
void
slru_undo ( XLogRecPtr lsn , XLogRecord * record )
{
}
void
slru_desc ( char * buf , uint8 xl_info , char * rec )
{
uint8 info = xl_info & ~ XLR_INFO_MASK ;
if ( info = = CLOG_ZEROPAGE )
{
int pageno ;
memcpy ( & pageno , rec , sizeof ( int ) ) ;
sprintf ( buf + strlen ( buf ) , " clog zeropage: %d " , pageno ) ;
}
else if ( info = = SUBTRANS_ZEROPAGE )
{
int pageno ;
memcpy ( & pageno , rec , sizeof ( int ) ) ;
sprintf ( buf + strlen ( buf ) , " subtrans zeropage: %d " , pageno ) ;
}
else
strcat ( buf , " UNKNOWN " ) ;
}