@ -63,22 +63,33 @@
snprintf ( path , MAXPGPATH , " %s/%04X " , ( ctl ) - > Dir , seg )
/*
* During SimpleLruFlush ( ) , we will usually not need to write / fsync more
* than one or two physical files , but we may need to write several pages
* per file . We can consolidate the I / O requests by leaving files open
* until control returns to SimpleLruFlush ( ) . This data structure remembers
* which files are open .
* During SimpleLruWriteAll ( ) , we will usually not need to write more than one
* or two physical files , but we may need to write several pages per file . We
* can consolidate the I / O requests by leaving files open until control returns
* to SimpleLruWriteAll ( ) . This data structure remembers which files are open .
*/
# define MAX_FLUSH _BUFFERS 16
# define MAX_WRITEALL _BUFFERS 16
typedef struct SlruFlush Data
typedef struct SlruWriteAll Data
{
int num_files ; /* # files actually open */
int fd [ MAX_FLUSH _BUFFERS ] ; /* their FD's */
int segno [ MAX_FLUSH _BUFFERS ] ; /* their log seg#s */
} SlruFlush Data ;
int fd [ MAX_WRITEALL _BUFFERS ] ; /* their FD's */
int segno [ MAX_WRITEALL _BUFFERS ] ; /* their log seg#s */
} SlruWriteAll Data ;
typedef struct SlruFlushData * SlruFlush ;
typedef struct SlruWriteAllData * SlruWriteAll ;
/*
* Populate a file tag describing a segment file . We only use the segment
* number , since we can derive everything else we need by having separate
* sync handler functions for clog , multixact etc .
*/
# define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
( \
memset ( & ( a ) , 0 , sizeof ( FileTag ) ) , \
( a ) . handler = ( xx_handler ) , \
( a ) . segno = ( xx_segno ) \
)
/*
* Macro to mark a buffer slot " most recently used " . Note multiple evaluation
@ -125,10 +136,10 @@ static int slru_errno;
static void SimpleLruZeroLSNs ( SlruCtl ctl , int slotno ) ;
static void SimpleLruWaitIO ( SlruCtl ctl , int slotno ) ;
static void SlruInternalWritePage ( SlruCtl ctl , int slotno , SlruFlush fdata ) ;
static void SlruInternalWritePage ( SlruCtl ctl , int slotno , SlruWriteAll fdata ) ;
static bool SlruPhysicalReadPage ( SlruCtl ctl , int pageno , int slotno ) ;
static bool SlruPhysicalWritePage ( SlruCtl ctl , int pageno , int slotno ,
SlruFlush fdata ) ;
SlruWriteAll fdata ) ;
static void SlruReportIOError ( SlruCtl ctl , int pageno , TransactionId xid ) ;
static int SlruSelectLRUPage ( SlruCtl ctl , int pageno ) ;
@ -173,7 +184,8 @@ SimpleLruShmemSize(int nslots, int nlsns)
*/
void
SimpleLruInit ( SlruCtl ctl , const char * name , int nslots , int nlsns ,
LWLock * ctllock , const char * subdir , int tranche_id )
LWLock * ctllock , const char * subdir , int tranche_id ,
SyncRequestHandler sync_handler )
{
SlruShared shared ;
bool found ;
@ -251,7 +263,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
* assume caller set PagePrecedes .
*/
ctl - > shared = shared ;
ctl - > do_fsync = true ; /* default behavior */
ctl - > sync_handler = sync_handler ;
strlcpy ( ctl - > Dir , subdir , sizeof ( ctl - > Dir ) ) ;
}
@ -523,7 +535,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
* Control lock must be held at entry , and will be held at exit .
*/
static void
SlruInternalWritePage ( SlruCtl ctl , int slotno , SlruFlush fdata )
SlruInternalWritePage ( SlruCtl ctl , int slotno , SlruWriteAll fdata )
{
SlruShared shared = ctl - > shared ;
int pageno = shared - > page_number [ slotno ] ;
@ -587,6 +599,10 @@ SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
/* Now it's okay to ereport if we failed */
if ( ! ok )
SlruReportIOError ( ctl , pageno , InvalidTransactionId ) ;
/* If part of a checkpoint, count this as a buffer written. */
if ( fdata )
CheckpointStats . ckpt_bufs_written + + ;
}
/*
@ -730,13 +746,13 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
*
* For now , assume it ' s not worth keeping a file pointer open across
* independent read / write operations . We do batch operations during
* SimpleLruFlush , though .
* SimpleLruWriteAll , though .
*
* fdata is NULL for a standalone write , pointer to open - file info during
* SimpleLruFlush .
* SimpleLruWriteAll .
*/
static bool
SlruPhysicalWritePage ( SlruCtl ctl , int pageno , int slotno , SlruFlush fdata )
SlruPhysicalWritePage ( SlruCtl ctl , int pageno , int slotno , SlruWriteAll fdata )
{
SlruShared shared = ctl - > shared ;
int segno = pageno / SLRU_PAGES_PER_SEGMENT ;
@ -791,7 +807,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
/*
* During a Flush , we may already have the desired file open .
* During a WriteAll , we may already have the desired file open .
*/
if ( fdata )
{
@ -837,7 +853,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
if ( fdata )
{
if ( fdata - > num_files < MAX_FLUSH _BUFFERS )
if ( fdata - > num_files < MAX_WRITEALL _BUFFERS )
{
fdata - > fd [ fdata - > num_files ] = fd ;
fdata - > segno [ fdata - > num_files ] = segno ;
@ -870,23 +886,31 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
pgstat_report_wait_end ( ) ;
/*
* If not part of Flush , need to fsync now . We assume this happens
* infrequently enough that it ' s not a performance issue .
*/
if ( ! fdata )
/* Queue up a sync request for the checkpointer. */
if ( ctl - > sync_handler ! = SYNC_HANDLER_NONE )
{
pgstat_report_wait_start ( WAIT_EVENT_SLRU_SYNC ) ;
if ( ctl - > do_fsync & & pg_fsync ( fd ) ! = 0 )
FileTag tag ;
INIT_SLRUFILETAG ( tag , ctl - > sync_handler , segno ) ;
if ( ! RegisterSyncRequest ( & tag , SYNC_REQUEST , false ) )
{
/* No space to enqueue sync request. Do it synchronously. */
pgstat_report_wait_start ( WAIT_EVENT_SLRU_SYNC ) ;
if ( pg_fsync ( fd ) ! = 0 )
{
pgstat_report_wait_end ( ) ;
slru_errcause = SLRU_FSYNC_FAILED ;
slru_errno = errno ;
CloseTransientFile ( fd ) ;
return false ;
}
pgstat_report_wait_end ( ) ;
slru_errcause = SLRU_FSYNC_FAILED ;
slru_errno = errno ;
CloseTransientFile ( fd ) ;
return false ;
}
pgstat_report_wait_end ( ) ;
}
/* Close file, unless part of flush request. */
if ( ! fdata )
{
if ( CloseTransientFile ( fd ) ! = 0 )
{
slru_errcause = SLRU_CLOSE_FAILED ;
@ -1122,13 +1146,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
}
/*
* Flush dirty pages to disk during checkpoint or database shutdown
* Write dirty pages to disk during checkpoint or database shutdown . Flushing
* is deferred until the next call to ProcessSyncRequests ( ) , though we do fsync
* the containing directory here to make sure that newly created directory
* entries are on disk .
*/
void
SimpleLruFlush ( SlruCtl ctl , bool allow_redirtied )
SimpleLruWriteAll ( SlruCtl ctl , bool allow_redirtied )
{
SlruShared shared = ctl - > shared ;
SlruFlush Data fdata ;
SlruWriteAll Data fdata ;
int slotno ;
int pageno = 0 ;
int i ;
@ -1162,21 +1189,11 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
LWLockRelease ( shared - > ControlLock ) ;
/*
* Now fsync and close any files that were open
* Now close any files that were open
*/
ok = true ;
for ( i = 0 ; i < fdata . num_files ; i + + )
{
pgstat_report_wait_start ( WAIT_EVENT_SLRU_FLUSH_SYNC ) ;
if ( ctl - > do_fsync & & pg_fsync ( fdata . fd [ i ] ) ! = 0 )
{
slru_errcause = SLRU_FSYNC_FAILED ;
slru_errno = errno ;
pageno = fdata . segno [ i ] * SLRU_PAGES_PER_SEGMENT ;
ok = false ;
}
pgstat_report_wait_end ( ) ;
if ( CloseTransientFile ( fdata . fd [ i ] ) ! = 0 )
{
slru_errcause = SLRU_CLOSE_FAILED ;
@ -1189,7 +1206,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
SlruReportIOError ( ctl , pageno , InvalidTransactionId ) ;
/* Ensure that directory entries for new files are on disk. */
if ( ctl - > do_fsync )
if ( ctl - > sync_handler ! = SYNC_HANDLER_NONE )
fsync_fname ( ctl - > Dir , true ) ;
}
@ -1350,6 +1367,19 @@ restart:
snprintf ( path , MAXPGPATH , " %s/%04X " , ctl - > Dir , segno ) ;
ereport ( DEBUG2 ,
( errmsg ( " removing file \" %s \" " , path ) ) ) ;
/*
* Tell the checkpointer to forget any sync requests , before we unlink the
* file .
*/
if ( ctl - > sync_handler ! = SYNC_HANDLER_NONE )
{
FileTag tag ;
INIT_SLRUFILETAG ( tag , ctl - > sync_handler , segno ) ;
RegisterSyncRequest ( & tag , SYNC_FORGET_REQUEST , true ) ;
}
unlink ( path ) ;
LWLockRelease ( shared - > ControlLock ) ;
@ -1448,3 +1478,31 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
return retval ;
}
/*
* Individual SLRUs ( clog , . . . ) have to provide a sync . c handler function so
* that they can provide the correct " SlruCtl " ( otherwise we don ' t know how to
* build the path ) , but they just forward to this common implementation that
* performs the fsync .
*/
int
SlruSyncFileTag ( SlruCtl ctl , const FileTag * ftag , char * path )
{
int fd ;
int save_errno ;
int result ;
SlruFileName ( ctl , path , ftag - > segno ) ;
fd = OpenTransientFile ( path , O_RDWR | PG_BINARY ) ;
if ( fd < 0 )
return - 1 ;
result = pg_fsync ( fd ) ;
save_errno = errno ;
CloseTransientFile ( fd ) ;
errno = save_errno ;
return result ;
}