@ -7,10 +7,15 @@
* ( Note that there can be more than one walsender process concurrently . )
* It is started by the postmaster when the walreceiver of a standby server
* connects to the primary server and requests XLOG streaming replication .
* It attempts to keep reading XLOG records from the disk and sending them
* to the standby server , as long as the connection is alive ( i . e . , like
* any backend , there is a one - to - one relationship between a connection
* and a walsender process ) .
*
* A walsender is similar to a regular backend , ie . there is a one - to - one
* relationship between a connection and a walsender process , but instead
* of processing SQL queries , it understands a small set of special
* replication - mode commands . The START_REPLICATION command begins streaming
* WAL to the client . While streaming , the walsender keeps reading XLOG
* records from the disk and sends them to the standby server over the
* COPY protocol , until the either side ends the replication by exiting COPY
* mode ( or until the connection is closed ) .
*
* Normal termination is by SIGTERM , which instructs the walsender to
* close the connection and exit ( 0 ) at next convenient moment . Emergency
@ -37,6 +42,7 @@
# include <signal.h>
# include <unistd.h>
# include "access/timeline.h"
# include "access/transam.h"
# include "access/xlog_internal.h"
# include "catalog/pg_type.h"
@ -87,8 +93,6 @@ bool am_walsender = false; /* Am I a walsender process ? */
bool am_cascading_walsender = false ; /* Am I cascading WAL to
* another standby ? */
static bool replication_started = false ; /* Started streaming yet? */
/* User-settable parameters for walsender */
int max_wal_senders = 0 ; /* the maximum number of concurrent walsenders */
int wal_sender_timeout = 60 * 1000 ; /* maximum time to send one
@ -106,6 +110,16 @@ static int sendFile = -1;
static XLogSegNo sendSegNo = 0 ;
static uint32 sendOff = 0 ;
/*
* These variables keep track of the state of the timeline we ' re currently
* sending . sendTimeLine identifies the timeline . If sendTimeLineIsHistoric ,
* the timeline is not the latest timeline on this server , and the server ' s
* history forked off from that timeline at sendTimeLineValidUpto .
*/
static TimeLineID sendTimeLine = 0 ;
static bool sendTimeLineIsHistoric = false ;
static XLogRecPtr sendTimeLineValidUpto = InvalidXLogRecPtr ;
/*
* How far have we sent WAL already ? This is also advertised in
* MyWalSnd - > sentPtr . ( Actually , this is the next WAL location to send . )
@ -124,9 +138,26 @@ static TimestampTz last_reply_timestamp;
/* Have we sent a heartbeat message asking for reply, since last reply? */
static bool ping_sent = false ;
/*
* While streaming WAL in Copy mode , streamingDoneSending is set to true
* after we have sent CopyDone . We should not send any more CopyData messages
* after that . streamingDoneReceiving is set to true when we receive CopyDone
* from the other end . When both become true , it ' s time to exit Copy mode .
*/
static bool streamingDoneSending ;
static bool streamingDoneReceiving ;
/* Flags set by signal handlers for later service in main loop */
static volatile sig_atomic_t got_SIGHUP = false ;
volatile sig_atomic_t walsender_ready_to_stop = false ;
static volatile sig_atomic_t walsender_ready_to_stop = false ;
/*
* This is set while we are streaming . When not set , SIGUSR2 signal will be
* handled like SIGTERM . When set , the main loop is responsible for checking
* walsender_ready_to_stop and terminating when it ' s set ( after streaming any
* remaining WAL ) .
*/
static volatile sig_atomic_t replication_active = false ;
/* Signal handlers */
static void WalSndSigHupHandler ( SIGNAL_ARGS ) ;
@ -134,7 +165,7 @@ static void WalSndXLogSendHandler(SIGNAL_ARGS);
static void WalSndLastCycleHandler ( SIGNAL_ARGS ) ;
/* Prototypes for private functions */
static void WalSndLoop ( void ) __attribute__ ( ( noreturn ) ) ;
static void WalSndLoop ( void ) ;
static void InitWalSenderSlot ( void ) ;
static void WalSndKill ( int code , Datum arg ) ;
static void XLogSend ( bool * caughtup ) ;
@ -164,6 +195,16 @@ InitWalSender(void)
*/
if ( am_cascading_walsender )
ThisTimeLineID = GetRecoveryTargetTLI ( ) ;
/*
* Let postmaster know that we ' re a WAL sender . Once we ' ve declared us as
* a WAL sender process , postmaster will let us outlive the bgwriter and
* kill us last in the shutdown sequence , so we get a chance to stream all
* remaining WAL at shutdown , including the shutdown checkpoint . Note that
* there ' s no going back , and we mustn ' t write any WAL records after this .
*/
MarkPostmasterChildWalSender ( ) ;
SendPostmasterSignal ( PMSIGNAL_ADVANCE_STATE_MACHINE ) ;
}
/*
@ -182,17 +223,16 @@ WalSndErrorCleanup()
sendFile = - 1 ;
}
/*
* Don ' t return back to the command loop after we ' ve started replicating .
* We ' ve already marked us as an actively streaming WAL sender in the
* PMSignal slot , and there ' s currently no way to undo that .
*/
if ( replication_started )
replication_active = false ;
if ( walsender_ready_to_stop )
proc_exit ( 0 ) ;
/* Revert back to startup state */
WalSndSetState ( WALSNDSTATE_STARTUP ) ;
}
/*
* IDENTIFY_SYSTEM
* Handle the IDENTIFY_SYSTEM command .
*/
static void
IdentifySystem ( void )
@ -210,9 +250,17 @@ IdentifySystem(void)
snprintf ( sysid , sizeof ( sysid ) , UINT64_FORMAT ,
GetSystemIdentifier ( ) ) ;
snprintf ( tli , sizeof ( tli ) , " %u " , ThisTimeLineID ) ;
logptr = am_cascading_walsender ? GetStandbyFlushRecPtr ( NULL ) : GetInsertRecPtr ( ) ;
am_cascading_walsender = RecoveryInProgress ( ) ;
if ( am_cascading_walsender )
{
logptr = GetStandbyFlushRecPtr ( ) ;
ThisTimeLineID = GetRecoveryTargetTLI ( ) ;
}
else
logptr = GetInsertRecPtr ( ) ;
snprintf ( tli , sizeof ( tli ) , " %u " , ThisTimeLineID ) ;
snprintf ( xpos , sizeof ( xpos ) , " %X/%X " , ( uint32 ) ( logptr > > 32 ) , ( uint32 ) logptr ) ;
@ -261,56 +309,106 @@ IdentifySystem(void)
pq_endmessage ( & buf ) ;
}
/*
* Handle START_REPLICATION command .
*
* At the moment , this never returns , but an ereport ( ERROR ) will take us back
* to the main loop .
* Handle TIMELINE_HISTORY command .
*/
static void
StartReplication ( StartReplication Cmd * cmd )
SendTimeLineHistory ( TimeLineHistoryCmd * cmd )
{
StringInfoData buf ;
char histfname [ MAXFNAMELEN ] ;
char path [ MAXPGPATH ] ;
int fd ;
size_t histfilelen ;
size_t bytesleft ;
/*
* Let postmaster know that we ' re streaming . Once we ' ve declared us as a
* WAL sender process , postmaster will let us outlive the bgwriter and
* kill us last in the shutdown sequence , so we get a chance to stream all
* remaining WAL at shutdown , including the shutdown checkpoint . Note that
* there ' s no going back , and we mustn ' t write any WAL records after this .
* Reply with a result set with one row , and two columns . The first col
* is the name of the history file , 2 nd is the contents .
*/
MarkPostmasterChildWalSender ( ) ;
SendPostmasterSignal ( PMSIGNAL_ADVANCE_STATE_MACHINE ) ;
replication_started = true ;
/*
* When promoting a cascading standby , postmaster sends SIGUSR2 to any
* cascading walsenders to kill them . But there is a corner - case where
* such walsender fails to receive SIGUSR2 and survives a standby
* promotion unexpectedly . This happens when postmaster sends SIGUSR2
* before the walsender marks itself as a WAL sender , because postmaster
* sends SIGUSR2 to only the processes marked as a WAL sender .
*
* To avoid this corner - case , if recovery is NOT in progress even though
* the walsender is cascading one , we do the same thing as SIGUSR2 signal
* handler does , i . e . , set walsender_ready_to_stop to true . Which causes
* the walsender to end later .
*
* When terminating cascading walsenders , usually postmaster writes the
* log message announcing the terminations . But there is a race condition
* here . If there is no walsender except this process before reaching
* here , postmaster thinks that there is no walsender and suppresses that
* log message . To handle this case , we always emit that log message here .
* This might cause duplicate log messages , but which is less likely to
* happen , so it ' s not worth writing some code to suppress them .
*/
if ( am_cascading_walsender & & ! RecoveryInProgress ( ) )
TLHistoryFileName ( histfname , cmd - > timeline ) ;
TLHistoryFilePath ( path , cmd - > timeline ) ;
/* Send a RowDescription message */
pq_beginmessage ( & buf , ' T ' ) ;
pq_sendint ( & buf , 2 , 2 ) ; /* 2 fields */
/* first field */
pq_sendstring ( & buf , " filename " ) ; /* col name */
pq_sendint ( & buf , 0 , 4 ) ; /* table oid */
pq_sendint ( & buf , 0 , 2 ) ; /* attnum */
pq_sendint ( & buf , TEXTOID , 4 ) ; /* type oid */
pq_sendint ( & buf , - 1 , 2 ) ; /* typlen */
pq_sendint ( & buf , 0 , 4 ) ; /* typmod */
pq_sendint ( & buf , 0 , 2 ) ; /* format code */
/* second field */
pq_sendstring ( & buf , " content " ) ; /* col name */
pq_sendint ( & buf , 0 , 4 ) ; /* table oid */
pq_sendint ( & buf , 0 , 2 ) ; /* attnum */
pq_sendint ( & buf , BYTEAOID , 4 ) ; /* type oid */
pq_sendint ( & buf , - 1 , 2 ) ; /* typlen */
pq_sendint ( & buf , 0 , 4 ) ; /* typmod */
pq_sendint ( & buf , 0 , 2 ) ; /* format code */
pq_endmessage ( & buf ) ;
/* Send a DataRow message */
pq_beginmessage ( & buf , ' D ' ) ;
pq_sendint ( & buf , 2 , 2 ) ; /* # of columns */
pq_sendint ( & buf , strlen ( histfname ) , 4 ) ; /* col1 len */
pq_sendbytes ( & buf , histfname , strlen ( histfname ) ) ;
fd = OpenTransientFile ( path , O_RDONLY | PG_BINARY , 0666 ) ;
if ( fd < 0 )
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not open file \" %s \" : %m " , path ) ) ) ;
/* Determine file length and send it to client */
histfilelen = lseek ( fd , 0 , SEEK_END ) ;
if ( histfilelen < 0 )
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not seek to end of file \" %s \" : %m " , path ) ) ) ;
if ( lseek ( fd , 0 , SEEK_SET ) ! = 0 )
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not seek to beginning of file \" %s \" : %m " , path ) ) ) ;
pq_sendint ( & buf , histfilelen , 4 ) ; /* col2 len */
bytesleft = histfilelen ;
while ( bytesleft > 0 )
{
ereport ( LOG ,
( errmsg ( " terminating walsender process to force cascaded standby "
" to update timeline and reconnect " ) ) ) ;
walsender_ready_to_stop = true ;
char rbuf [ BLCKSZ ] ;
int nread ;
nread = read ( fd , rbuf , sizeof ( rbuf ) ) ;
if ( nread < = 0 )
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not read file \" %s \" : %m " ,
path ) ) ) ;
pq_sendbytes ( & buf , rbuf , nread ) ;
bytesleft - = nread ;
}
CloseTransientFile ( fd ) ;
pq_endmessage ( & buf ) ;
}
/*
* Handle START_REPLICATION command .
*
* At the moment , this never returns , but an ereport ( ERROR ) will take us back
* to the main loop .
*/
static void
StartReplication ( StartReplicationCmd * cmd )
{
StringInfoData buf ;
/*
* We assume here that we ' re logging enough information in the WAL for
@ -322,42 +420,144 @@ StartReplication(StartReplicationCmd *cmd)
*/
/*
* When we first start replication the standby will be behind the primary .
* For some applications , for example , synchronous replication , it is
* important to have a clear state for this initial catchup mode , so we
* can trigger actions when we change streaming state later . We may stay
* in this state for a long time , which is exactly why we want to be able
* to monitor whether or not we are still here .
* Select the timeline . If it was given explicitly by the client , use
* that . Otherwise use the current ThisTimeLineID .
*/
WalSndSetState ( WALSNDSTATE_CATCHUP ) ;
if ( cmd - > timeline ! = 0 )
{
XLogRecPtr switchpoint ;
/* Send a CopyBothResponse message, and start streaming */
pq_beginmessage ( & buf , ' W ' ) ;
pq_sendbyte ( & buf , 0 ) ;
pq_sendint ( & buf , 0 , 2 ) ;
pq_endmessage ( & buf ) ;
pq_flush ( ) ;
sendTimeLine = cmd - > timeline ;
if ( sendTimeLine = = ThisTimeLineID )
{
sendTimeLineIsHistoric = false ;
sendTimeLineValidUpto = InvalidXLogRecPtr ;
}
else
{
List * timeLineHistory ;
/*
* Initialize position to the received one , then the xlog records begin to
* be shipped from that position
*/
sentPtr = cmd - > startpoint ;
sendTimeLineIsHistoric = true ;
/* Also update the start position status in shared memory */
{
/* use volatile pointer to prevent code rearrangement */
volatile WalSnd * walsnd = MyWalSnd ;
/*
* Check that the timeline the client requested for exists , and the
* requested start location is on that timeline .
*/
timeLineHistory = readTimeLineHistory ( ThisTimeLineID ) ;
switchpoint = tliSwitchPoint ( cmd - > timeline , timeLineHistory ) ;
list_free_deep ( timeLineHistory ) ;
SpinLockAcquire ( & walsnd - > mutex ) ;
walsnd - > sentPtr = sentPtr ;
SpinLockRelease ( & walsnd - > mutex ) ;
/*
* Found the requested timeline in the history . Check that
* requested startpoint is on that timeline in our history .
*
* This is quite loose on purpose . We only check that we didn ' t
* fork off the requested timeline before the switchpoint . We don ' t
* check that we switched * to * it before the requested starting
* point . This is because the client can legitimately request to
* start replication from the beginning of the WAL segment that
* contains switchpoint , but on the new timeline , so that it
* doesn ' t end up with a partial segment . If you ask for a too old
* starting point , you ' ll get an error later when we fail to find
* the requested WAL segment in pg_xlog .
*
* XXX : we could be more strict here and only allow a startpoint
* that ' s older than the switchpoint , if it it ' s still in the same
* WAL segment .
*/
if ( ! XLogRecPtrIsInvalid ( switchpoint ) & &
XLByteLT ( switchpoint , cmd - > startpoint ) )
{
ereport ( ERROR ,
( errmsg ( " requested starting point %X/%X on timeline %u is not in this server's history " ,
( uint32 ) ( cmd - > startpoint > > 32 ) ,
( uint32 ) ( cmd - > startpoint ) ,
cmd - > timeline ) ,
errdetail ( " This server's history forked from timeline %u at %X/%X " ,
cmd - > timeline ,
( uint32 ) ( switchpoint > > 32 ) ,
( uint32 ) ( switchpoint ) ) ) ) ;
}
sendTimeLineValidUpto = switchpoint ;
}
}
else
{
sendTimeLine = ThisTimeLineID ;
sendTimeLineValidUpto = InvalidXLogRecPtr ;
sendTimeLineIsHistoric = false ;
}
SyncRepInitConfig ( ) ;
streamingDoneSending = streamingDoneReceiving = false ;
/* If there is nothing to stream, don't even enter COPY mode */
if ( ! sendTimeLineIsHistoric | |
XLByteLT ( cmd - > startpoint , sendTimeLineValidUpto ) )
{
XLogRecPtr FlushPtr ;
/*
* When we first start replication the standby will be behind the primary .
* For some applications , for example , synchronous replication , it is
* important to have a clear state for this initial catchup mode , so we
* can trigger actions when we change streaming state later . We may stay
* in this state for a long time , which is exactly why we want to be able
* to monitor whether or not we are still here .
*/
WalSndSetState ( WALSNDSTATE_CATCHUP ) ;
/* Send a CopyBothResponse message, and start streaming */
pq_beginmessage ( & buf , ' W ' ) ;
pq_sendbyte ( & buf , 0 ) ;
pq_sendint ( & buf , 0 , 2 ) ;
pq_endmessage ( & buf ) ;
pq_flush ( ) ;
/*
* Don ' t allow a request to stream from a future point in WAL that
* hasn ' t been flushed to disk in this server yet .
*/
if ( am_cascading_walsender )
FlushPtr = GetStandbyFlushRecPtr ( ) ;
else
FlushPtr = GetFlushRecPtr ( ) ;
if ( XLByteLT ( FlushPtr , cmd - > startpoint ) )
{
ereport ( ERROR ,
( errmsg ( " requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X " ,
( uint32 ) ( cmd - > startpoint > > 32 ) ,
( uint32 ) ( cmd - > startpoint ) ,
( uint32 ) ( FlushPtr > > 32 ) ,
( uint32 ) ( FlushPtr ) ) ) ) ;
}
/* Start streaming from the requested point */
sentPtr = cmd - > startpoint ;
/* Main loop of walsender */
WalSndLoop ( ) ;
/* Initialize shared memory status, too */
{
/* use volatile pointer to prevent code rearrangement */
volatile WalSnd * walsnd = MyWalSnd ;
SpinLockAcquire ( & walsnd - > mutex ) ;
walsnd - > sentPtr = sentPtr ;
SpinLockRelease ( & walsnd - > mutex ) ;
}
SyncRepInitConfig ( ) ;
/* Main loop of walsender */
replication_active = true ;
WalSndLoop ( ) ;
replication_active = false ;
if ( walsender_ready_to_stop )
proc_exit ( 0 ) ;
WalSndSetState ( WALSNDSTATE_STARTUP ) ;
}
/* Get out of COPY mode (CommandComplete). */
EndCommand ( " COPY 0 " , DestRemote ) ;
}
/*
@ -406,10 +606,13 @@ exec_replication_command(const char *cmd_string)
SendBaseBackup ( ( BaseBackupCmd * ) cmd_node ) ;
break ;
case T_TimeLineHistoryCmd :
SendTimeLineHistory ( ( TimeLineHistoryCmd * ) cmd_node ) ;
break ;
default :
ereport ( FATAL ,
( errcode ( ERRCODE_PROTOCOL_VIOLATION ) ,
errmsg ( " invalid standby query string: %s " , cmd_string ) ) ) ;
elog ( ERROR , " unrecognized replication command node tag: %u " ,
cmd_node - > type ) ;
}
/* done */
@ -421,7 +624,8 @@ exec_replication_command(const char *cmd_string)
}
/*
* Check if the remote end has closed the connection .
* Process any incoming messages while streaming . Also checks if the remote
* end has closed the connection .
*/
static void
ProcessRepliesIfAny ( void )
@ -430,7 +634,12 @@ ProcessRepliesIfAny(void)
int r ;
bool received = false ;
for ( ; ; )
/*
* If we already received a CopyDone from the frontend , any subsequent
* message is the beginning of a new command , and should be processed in
* the main processing loop .
*/
while ( ! streamingDoneReceiving )
{
r = pq_getbyte_if_available ( & firstchar ) ;
if ( r < 0 )
@ -458,6 +667,31 @@ ProcessRepliesIfAny(void)
received = true ;
break ;
/*
* CopyDone means the standby requested to finish streaming .
* Reply with CopyDone , if we had not sent that already .
*/
case ' c ' :
if ( ! streamingDoneSending )
{
pq_putmessage_noblock ( ' c ' , NULL , 0 ) ;
streamingDoneSending = true ;
}
/* consume the CopyData message */
resetStringInfo ( & reply_message ) ;
if ( pq_getmessage ( & reply_message , 0 ) )
{
ereport ( COMMERROR ,
( errcode ( ERRCODE_PROTOCOL_VIOLATION ) ,
errmsg ( " unexpected EOF on standby connection " ) ) ) ;
proc_exit ( 0 ) ;
}
streamingDoneReceiving = true ;
received = true ;
break ;
/*
* ' X ' means that the standby is closing down the socket .
*/
@ -666,7 +900,10 @@ WalSndLoop(void)
last_reply_timestamp = GetCurrentTimestamp ( ) ;
ping_sent = false ;
/* Loop forever, unless we get an error */
/*
* Loop until we reach the end of this timeline or the client requests
* to stop streaming .
*/
for ( ; ; )
{
/* Clear any already-pending wakeups */
@ -692,6 +929,14 @@ WalSndLoop(void)
/* Check for input from the client */
ProcessRepliesIfAny ( ) ;
/*
* If we have received CopyDone from the client , sent CopyDone
* ourselves , and the output buffer is empty , it ' s time to exit
* streaming .
*/
if ( ! pq_is_send_pending ( ) & & streamingDoneSending & & streamingDoneReceiving )
break ;
/*
* If we don ' t have any pending data in the output buffer , try to send
* some more . If there is some , we don ' t bother to call XLogSend
@ -705,7 +950,7 @@ WalSndLoop(void)
/* Try to flush pending output to the client */
if ( pq_flush_if_writable ( ) ! = 0 )
break ;
goto send_failure ;
/* If nothing remains to be sent right now ... */
if ( caughtup & & ! pq_is_send_pending ( ) )
@ -739,7 +984,7 @@ WalSndLoop(void)
if ( caughtup & & ! pq_is_send_pending ( ) )
{
/* Inform the standby that XLOG streaming is done */
pq_puttextmessage ( ' C ' , " COPY 0 " ) ;
EndCommand ( " COPY 0 " , DestRemote ) ;
pq_flush ( ) ;
proc_exit ( 0 ) ;
@ -754,14 +999,16 @@ WalSndLoop(void)
* loaded a subset of the available data but then pq_flush_if_writable
* flushed it all - - - we should immediately try to send more .
*/
if ( caughtup | | pq_is_send_pending ( ) )
if ( ( caughtup & & ! streamingDoneSending ) | | pq_is_send_pending ( ) )
{
TimestampTz timeout = 0 ;
long sleeptime = 10000 ; /* 10 s */
int wakeEvents ;
wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
WL_SOCKET_READABLE | WL_TIMEOUT ;
wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT ;
if ( ! streamingDoneReceiving )
wakeEvents | = WL_SOCKET_READABLE ;
if ( pq_is_send_pending ( ) )
wakeEvents | = WL_SOCKET_WRITEABLE ;
@ -813,11 +1060,13 @@ WalSndLoop(void)
*/
ereport ( COMMERROR ,
( errmsg ( " terminating walsender process due to replication timeout " ) ) ) ;
break ;
goto send_failure ;
}
}
}
return ;
send_failure :
/*
* Get here on send failure . Clean up and exit .
*
@ -916,7 +1165,7 @@ WalSndKill(int code, Datum arg)
* more than one .
*/
void
XLogRead ( char * buf , XLogRecPtr startptr , Size count )
XLogRead ( char * buf , TimeLineID tli , XLogRecPtr startptr , Size count )
{
char * p ;
XLogRecPtr recptr ;
@ -937,7 +1186,7 @@ retry:
startoff = recptr % XLogSegSize ;
if ( sendFile < 0 | | ! XLByteInSeg ( recptr , sendSegNo ) )
if ( sendFile < 0 | | ! XLByteInSeg ( recptr , sendSegNo ) | | sendTimeLine ! = tli )
{
char path [ MAXPGPATH ] ;
@ -945,8 +1194,9 @@ retry:
if ( sendFile > = 0 )
close ( sendFile ) ;
sendTimeLine = tli ;
XLByteToSeg ( recptr , sendSegNo ) ;
XLogFilePath ( path , Thi sTimeLineID , sendSegNo ) ;
XLogFilePath ( path , send TimeLine , sendSegNo ) ;
sendFile = BasicOpenFile ( path , O_RDONLY | PG_BINARY , 0 ) ;
if ( sendFile < 0 )
@ -960,7 +1210,7 @@ retry:
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " requested WAL segment %s has already been removed " ,
XLogFileNameP ( Thi sTimeLineID , sendSegNo ) ) ) ) ;
XLogFileNameP ( send TimeLine , sendSegNo ) ) ) ) ;
else
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
@ -977,7 +1227,7 @@ retry:
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not seek in log segment %s to offset %u: %m " ,
XLogFileNameP ( Thi sTimeLineID , sendSegNo ) ,
XLogFileNameP ( send TimeLine , sendSegNo ) ,
startoff ) ) ) ;
sendOff = startoff ;
}
@ -994,7 +1244,7 @@ retry:
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " could not read from log segment %s, offset %u, length %lu: %m " ,
XLogFileNameP ( Thi sTimeLineID , sendSegNo ) ,
XLogFileNameP ( send TimeLine , sendSegNo ) ,
sendOff , ( unsigned long ) segbytes ) ) ) ;
}
@ -1019,7 +1269,7 @@ retry:
ereport ( ERROR ,
( errcode_for_file_access ( ) ,
errmsg ( " requested WAL segment %s has already been removed " ,
XLogFileNameP ( Thi sTimeLineID , segno ) ) ) ) ;
XLogFileNameP ( send TimeLine , segno ) ) ) ) ;
/*
* During recovery , the currently - open WAL file might be replaced with the
@ -1060,10 +1310,17 @@ static void
XLogSend ( bool * caughtup )
{
XLogRecPtr SendRqstPtr ;
XLogRecPtr FlushPtr ;
XLogRecPtr startptr ;
XLogRecPtr endptr ;
Size nbytes ;
if ( streamingDoneSending )
{
* caughtup = true ;
return ;
}
/*
* Attempt to send all data that ' s already been written out and fsync ' d to
* disk . We cannot go further than what ' s been written out given the
@ -1073,32 +1330,103 @@ XLogSend(bool *caughtup)
* that gets lost on the master .
*/
if ( am_cascading_walsender )
FlushPtr = GetStandbyFlushRecPtr ( ) ;
else
FlushPtr = GetFlushRecPtr ( ) ;
/*
* In a cascading standby , the current recovery target timeline can
* change , or we can be promoted . In either case , the current timeline
* becomes historic . We need to detect that so that we don ' t try to stream
* past the point where we switched to another timeline . It ' s checked
* after calculating FlushPtr , to avoid a race condition : if the timeline
* becomes historic just after we checked that it was still current , it
* should still be OK to stream it up to the FlushPtr that was calculated
* before it became historic .
*/
if ( ! sendTimeLineIsHistoric & & am_cascading_walsender )
{
TimeLineID currentTargetTLI ;
SendRqstPtr = GetStandbyFlushRecPtr ( & currentTargetTLI ) ;
bool becameHistoric = false ;
TimeLineID targetTLI ;
/*
* If the recovery target timeline changed , bail out . It ' s a bit
* unfortunate that we have to just disconnect , but there is no way
* to tell the client that the timeline changed . We also don ' t know
* exactly where the switch happened , so we cannot safely try to send
* up to the switchover point before disconnecting .
*/
if ( currentTargetTLI ! = ThisTimeLineID )
if ( ! RecoveryInProgress ( ) )
{
if ( ! walsender_ready_to_stop )
ereport ( LOG ,
( errmsg ( " terminating walsender process to force cascaded standby "
" to update timeline and reconnect " ) ) ) ;
walsender_ready_to_stop = true ;
* caughtup = true ;
return ;
/*
* We have been promoted . RecoveryInProgress ( ) updated
* ThisTimeLineID to the new current timeline .
*/
targetTLI = ThisTimeLineID ;
am_cascading_walsender = false ;
becameHistoric = true ;
}
else
{
/*
* Still a cascading standby . But is the timeline we ' re sending
* still the recovery target timeline ?
*/
targetTLI = GetRecoveryTargetTLI ( ) ;
if ( targetTLI ! = sendTimeLine )
becameHistoric = true ;
}
if ( becameHistoric )
{
/*
* The timeline we were sending has become historic . Read the
* timeline history file of the new timeline to see where exactly
* we forked off from the timeline we were sending .
*/
List * history ;
history = readTimeLineHistory ( targetTLI ) ;
sendTimeLineValidUpto = tliSwitchPoint ( sendTimeLine , history ) ;
Assert ( XLByteLE ( sentPtr , sendTimeLineValidUpto ) ) ;
list_free_deep ( history ) ;
/* the switchpoint should be >= current send pointer */
if ( ! XLByteLE ( sentPtr , sendTimeLineValidUpto ) )
elog ( ERROR , " server switched off timeline %u at %X/%X, but walsender already streamed up to %X/%X " ,
sendTimeLine ,
( uint32 ) ( sendTimeLineValidUpto > > 32 ) ,
( uint32 ) sendTimeLineValidUpto ,
( uint32 ) ( sentPtr > > 32 ) ,
( uint32 ) sentPtr ) ;
sendTimeLineIsHistoric = true ;
}
}
/*
* If this is a historic timeline and we ' ve reached the point where we
* forked to the next timeline , stop streaming .
*/
if ( sendTimeLineIsHistoric & & XLByteLE ( sendTimeLineValidUpto , sentPtr ) )
{
/* close the current file. */
if ( sendFile > = 0 )
close ( sendFile ) ;
sendFile = - 1 ;
/* Send CopyDone */
pq_putmessage_noblock ( ' c ' , NULL , 0 ) ;
streamingDoneSending = true ;
* caughtup = true ;
return ;
}
/*
* Stream up to the point known to be flushed to disk , or to the end of
* this timeline , whichever comes first .
*/
if ( sendTimeLineIsHistoric & & XLByteLT ( sendTimeLineValidUpto , FlushPtr ) )
SendRqstPtr = sendTimeLineValidUpto ;
else
SendRqstPtr = GetFlushRecPtr ( ) ;
SendRqstPtr = FlushPtr ;
/* Quick exit if nothing to do */
Assert ( XLByteLE ( sentPtr , SendRqstPtr ) ) ;
if ( XLByteLE ( SendRqstPtr , sentPtr ) )
{
* caughtup = true ;
@ -1124,7 +1452,10 @@ XLogSend(bool *caughtup)
if ( XLByteLE ( SendRqstPtr , endptr ) )
{
endptr = SendRqstPtr ;
* caughtup = true ;
if ( sendTimeLineIsHistoric )
* caughtup = false ;
else
* caughtup = true ;
}
else
{
@ -1151,7 +1482,7 @@ XLogSend(bool *caughtup)
* calls .
*/
enlargeStringInfo ( & output_message , nbytes ) ;
XLogRead ( & output_message . data [ output_message . len ] , startptr , nbytes ) ;
XLogRead ( & output_message . data [ output_message . len ] , sendTimeLine , s tartptr , nbytes ) ;
output_message . len + = nbytes ;
output_message . data [ output_message . len ] = ' \0 ' ;
@ -1242,6 +1573,14 @@ WalSndLastCycleHandler(SIGNAL_ARGS)
{
int save_errno = errno ;
/*
* If replication has not yet started , die like with SIGTERM . If
* replication is active , only set a flag and wake up the main loop . It
* will send any outstanding WAL , and then exit gracefully .
*/
if ( ! replication_active )
kill ( MyProcPid , SIGTERM ) ;
walsender_ready_to_stop = true ;
if ( MyWalSnd )
SetLatch ( & MyWalSnd - > latch ) ;