|
|
@ -403,7 +403,8 @@ typedef struct XLogCtlData |
|
|
|
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ |
|
|
|
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ |
|
|
|
TransactionId ckptXid; |
|
|
|
TransactionId ckptXid; |
|
|
|
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */ |
|
|
|
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */ |
|
|
|
XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG segment */ |
|
|
|
XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG
|
|
|
|
|
|
|
|
* segment */ |
|
|
|
|
|
|
|
|
|
|
|
/* Fake LSN counter, for unlogged relations. Protected by ulsn_lck */ |
|
|
|
/* Fake LSN counter, for unlogged relations. Protected by ulsn_lck */ |
|
|
|
XLogRecPtr unloggedLSN; |
|
|
|
XLogRecPtr unloggedLSN; |
|
|
@ -555,7 +556,7 @@ typedef enum |
|
|
|
} XLogSource; |
|
|
|
} XLogSource; |
|
|
|
|
|
|
|
|
|
|
|
/* human-readable names for XLogSources, for debugging output */ |
|
|
|
/* human-readable names for XLogSources, for debugging output */ |
|
|
|
static const char *xlogSourceNames[] = { "any", "archive", "pg_xlog", "stream" }; |
|
|
|
static const char *xlogSourceNames[] = {"any", "archive", "pg_xlog", "stream"}; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* openLogFile is -1 or a kernel FD for an open log file segment. |
|
|
|
* openLogFile is -1 or a kernel FD for an open log file segment. |
|
|
@ -1251,10 +1252,10 @@ XLogCheckBuffer(XLogRecData *rdata, bool holdsExclusiveLock, |
|
|
|
page = BufferGetPage(rdata->buffer); |
|
|
|
page = BufferGetPage(rdata->buffer); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* We assume page LSN is first data on *every* page that can be passed |
|
|
|
* We assume page LSN is first data on *every* page that can be passed to |
|
|
|
* to XLogInsert, whether it has the standard page layout or not. We |
|
|
|
* XLogInsert, whether it has the standard page layout or not. We don't |
|
|
|
* don't need to take the buffer header lock for PageGetLSN if we hold |
|
|
|
* need to take the buffer header lock for PageGetLSN if we hold an |
|
|
|
* an exclusive lock on the page and/or the relation. |
|
|
|
* exclusive lock on the page and/or the relation. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (holdsExclusiveLock) |
|
|
|
if (holdsExclusiveLock) |
|
|
|
*lsn = PageGetLSN(page); |
|
|
|
*lsn = PageGetLSN(page); |
|
|
@ -1858,7 +1859,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force) |
|
|
|
if (!force && newMinRecoveryPoint < lsn) |
|
|
|
if (!force && newMinRecoveryPoint < lsn) |
|
|
|
elog(WARNING, |
|
|
|
elog(WARNING, |
|
|
|
"xlog min recovery request %X/%X is past current point %X/%X", |
|
|
|
"xlog min recovery request %X/%X is past current point %X/%X", |
|
|
|
(uint32) (lsn >> 32) , (uint32) lsn, |
|
|
|
(uint32) (lsn >> 32), (uint32) lsn, |
|
|
|
(uint32) (newMinRecoveryPoint >> 32), |
|
|
|
(uint32) (newMinRecoveryPoint >> 32), |
|
|
|
(uint32) newMinRecoveryPoint); |
|
|
|
(uint32) newMinRecoveryPoint); |
|
|
|
|
|
|
|
|
|
|
@ -1979,8 +1980,8 @@ XLogFlush(XLogRecPtr record) |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Sleep before flush! By adding a delay here, we may give further |
|
|
|
* Sleep before flush! By adding a delay here, we may give further |
|
|
|
* backends the opportunity to join the backlog of group commit |
|
|
|
* backends the opportunity to join the backlog of group commit |
|
|
|
* followers; this can significantly improve transaction throughput, at |
|
|
|
* followers; this can significantly improve transaction throughput, |
|
|
|
* the risk of increasing transaction latency. |
|
|
|
* at the risk of increasing transaction latency. |
|
|
|
* |
|
|
|
* |
|
|
|
* We do not sleep if enableFsync is not turned on, nor if there are |
|
|
|
* We do not sleep if enableFsync is not turned on, nor if there are |
|
|
|
* fewer than CommitSiblings other backends with active transactions. |
|
|
|
* fewer than CommitSiblings other backends with active transactions. |
|
|
@ -3215,8 +3216,8 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* The checksum value on this page is currently invalid. We don't |
|
|
|
* The checksum value on this page is currently invalid. We don't need to |
|
|
|
* need to reset it here since it will be set before being written. |
|
|
|
* reset it here since it will be set before being written. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
PageSetLSN(page, lsn); |
|
|
|
PageSetLSN(page, lsn); |
|
|
@ -3272,16 +3273,17 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* We only end up here without a message when XLogPageRead() failed |
|
|
|
* We only end up here without a message when XLogPageRead() |
|
|
|
* - in that case we already logged something. |
|
|
|
* failed - in that case we already logged something. In |
|
|
|
* In StandbyMode that only happens if we have been triggered, so |
|
|
|
* StandbyMode that only happens if we have been triggered, so we |
|
|
|
* we shouldn't loop anymore in that case. |
|
|
|
* shouldn't loop anymore in that case. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (errormsg) |
|
|
|
if (errormsg) |
|
|
|
ereport(emode_for_corrupt_record(emode, |
|
|
|
ereport(emode_for_corrupt_record(emode, |
|
|
|
RecPtr ? RecPtr : EndRecPtr), |
|
|
|
RecPtr ? RecPtr : EndRecPtr), |
|
|
|
(errmsg_internal("%s", errormsg) /* already translated */)); |
|
|
|
(errmsg_internal("%s", errormsg) /* already translated */ )); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Check page TLI is one of the expected values. |
|
|
|
* Check page TLI is one of the expected values. |
|
|
|
*/ |
|
|
|
*/ |
|
|
@ -3314,10 +3316,10 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, |
|
|
|
lastSourceFailed = true; |
|
|
|
lastSourceFailed = true; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If archive recovery was requested, but we were still doing crash |
|
|
|
* If archive recovery was requested, but we were still doing |
|
|
|
* recovery, switch to archive recovery and retry using the offline |
|
|
|
* crash recovery, switch to archive recovery and retry using the |
|
|
|
* archive. We have now replayed all the valid WAL in pg_xlog, so |
|
|
|
* offline archive. We have now replayed all the valid WAL in |
|
|
|
* we are presumably now consistent. |
|
|
|
* pg_xlog, so we are presumably now consistent. |
|
|
|
* |
|
|
|
* |
|
|
|
* We require that there's at least some valid WAL present in |
|
|
|
* We require that there's at least some valid WAL present in |
|
|
|
* pg_xlog, however (!fetch_ckpt). We could recover using the WAL |
|
|
|
* pg_xlog, however (!fetch_ckpt). We could recover using the WAL |
|
|
@ -3401,11 +3403,11 @@ rescanLatestTimeLine(void) |
|
|
|
newExpectedTLEs = readTimeLineHistory(newtarget); |
|
|
|
newExpectedTLEs = readTimeLineHistory(newtarget); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If the current timeline is not part of the history of the new |
|
|
|
* If the current timeline is not part of the history of the new timeline, |
|
|
|
* timeline, we cannot proceed to it. |
|
|
|
* we cannot proceed to it. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
found = false; |
|
|
|
found = false; |
|
|
|
foreach (cell, newExpectedTLEs) |
|
|
|
foreach(cell, newExpectedTLEs) |
|
|
|
{ |
|
|
|
{ |
|
|
|
currentTle = (TimeLineHistoryEntry *) lfirst(cell); |
|
|
|
currentTle = (TimeLineHistoryEntry *) lfirst(cell); |
|
|
|
|
|
|
|
|
|
|
@ -4998,8 +5000,8 @@ StartupXLOG(void) |
|
|
|
&backupFromStandby)) |
|
|
|
&backupFromStandby)) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Archive recovery was requested, and thanks to the backup label file, |
|
|
|
* Archive recovery was requested, and thanks to the backup label |
|
|
|
* we know how far we need to replay to reach consistency. Enter |
|
|
|
* file, we know how far we need to replay to reach consistency. Enter |
|
|
|
* archive recovery directly. |
|
|
|
* archive recovery directly. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
InArchiveRecovery = true; |
|
|
|
InArchiveRecovery = true; |
|
|
@ -5049,8 +5051,8 @@ StartupXLOG(void) |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* It's possible that archive recovery was requested, but we don't |
|
|
|
* It's possible that archive recovery was requested, but we don't |
|
|
|
* know how far we need to replay the WAL before we reach consistency. |
|
|
|
* know how far we need to replay the WAL before we reach consistency. |
|
|
|
* This can happen for example if a base backup is taken from a running |
|
|
|
* This can happen for example if a base backup is taken from a |
|
|
|
* server using an atomic filesystem snapshot, without calling |
|
|
|
* running server using an atomic filesystem snapshot, without calling |
|
|
|
* pg_start/stop_backup. Or if you just kill a running master server |
|
|
|
* pg_start/stop_backup. Or if you just kill a running master server |
|
|
|
* and put it into archive recovery by creating a recovery.conf file. |
|
|
|
* and put it into archive recovery by creating a recovery.conf file. |
|
|
|
* |
|
|
|
* |
|
|
@ -5058,8 +5060,8 @@ StartupXLOG(void) |
|
|
|
* replaying all the WAL present in pg_xlog, and only enter archive |
|
|
|
* replaying all the WAL present in pg_xlog, and only enter archive |
|
|
|
* recovery after that. |
|
|
|
* recovery after that. |
|
|
|
* |
|
|
|
* |
|
|
|
* But usually we already know how far we need to replay the WAL (up to |
|
|
|
* But usually we already know how far we need to replay the WAL (up |
|
|
|
* minRecoveryPoint, up to backupEndPoint, or until we see an |
|
|
|
* to minRecoveryPoint, up to backupEndPoint, or until we see an |
|
|
|
* end-of-backup record), and we can enter archive recovery directly. |
|
|
|
* end-of-backup record), and we can enter archive recovery directly. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (ArchiveRecoveryRequested && |
|
|
|
if (ArchiveRecoveryRequested && |
|
|
@ -5119,15 +5121,16 @@ StartupXLOG(void) |
|
|
|
* timeline in the history of the requested timeline, we cannot proceed: |
|
|
|
* timeline in the history of the requested timeline, we cannot proceed: |
|
|
|
* the backup is not part of the history of the requested timeline. |
|
|
|
* the backup is not part of the history of the requested timeline. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
Assert(expectedTLEs); /* was initialized by reading checkpoint record */ |
|
|
|
Assert(expectedTLEs); /* was initialized by reading checkpoint
|
|
|
|
|
|
|
|
* record */ |
|
|
|
if (tliOfPointInHistory(checkPointLoc, expectedTLEs) != |
|
|
|
if (tliOfPointInHistory(checkPointLoc, expectedTLEs) != |
|
|
|
checkPoint.ThisTimeLineID) |
|
|
|
checkPoint.ThisTimeLineID) |
|
|
|
{ |
|
|
|
{ |
|
|
|
XLogRecPtr switchpoint; |
|
|
|
XLogRecPtr switchpoint; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* tliSwitchPoint will throw an error if the checkpoint's timeline |
|
|
|
* tliSwitchPoint will throw an error if the checkpoint's timeline is |
|
|
|
* is not in expectedTLEs at all. |
|
|
|
* not in expectedTLEs at all. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL); |
|
|
|
switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL); |
|
|
|
ereport(FATAL, |
|
|
|
ereport(FATAL, |
|
|
@ -5206,16 +5209,16 @@ StartupXLOG(void) |
|
|
|
ThisTimeLineID = checkPoint.ThisTimeLineID; |
|
|
|
ThisTimeLineID = checkPoint.ThisTimeLineID; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Copy any missing timeline history files between 'now' and the |
|
|
|
* Copy any missing timeline history files between 'now' and the recovery |
|
|
|
* recovery target timeline from archive to pg_xlog. While we don't need |
|
|
|
* target timeline from archive to pg_xlog. While we don't need those |
|
|
|
* those files ourselves - the history file of the recovery target |
|
|
|
* files ourselves - the history file of the recovery target timeline |
|
|
|
* timeline covers all the previous timelines in the history too - a |
|
|
|
* covers all the previous timelines in the history too - a cascading |
|
|
|
* cascading standby server might be interested in them. Or, if you |
|
|
|
* standby server might be interested in them. Or, if you archive the WAL |
|
|
|
* archive the WAL from this server to a different archive than the |
|
|
|
* from this server to a different archive than the master, it'd be good |
|
|
|
* master, it'd be good for all the history files to get archived there |
|
|
|
* for all the history files to get archived there after failover, so that |
|
|
|
* after failover, so that you can use one of the old timelines as a |
|
|
|
* you can use one of the old timelines as a PITR target. Timeline history |
|
|
|
* PITR target. Timeline history files are small, so it's better to copy |
|
|
|
* files are small, so it's better to copy them unnecessarily than not |
|
|
|
* them unnecessarily than not copy them and regret later. |
|
|
|
* copy them and regret later. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI); |
|
|
|
restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI); |
|
|
|
|
|
|
|
|
|
|
@ -5517,6 +5520,7 @@ StartupXLOG(void) |
|
|
|
do |
|
|
|
do |
|
|
|
{ |
|
|
|
{ |
|
|
|
bool switchedTLI = false; |
|
|
|
bool switchedTLI = false; |
|
|
|
|
|
|
|
|
|
|
|
#ifdef WAL_DEBUG |
|
|
|
#ifdef WAL_DEBUG |
|
|
|
if (XLOG_DEBUG || |
|
|
|
if (XLOG_DEBUG || |
|
|
|
(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || |
|
|
|
(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || |
|
|
@ -5598,13 +5602,13 @@ StartupXLOG(void) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Before replaying this record, check if this record |
|
|
|
* Before replaying this record, check if this record causes |
|
|
|
* causes the current timeline to change. The record is |
|
|
|
* the current timeline to change. The record is already |
|
|
|
* already considered to be part of the new timeline, |
|
|
|
* considered to be part of the new timeline, so we update |
|
|
|
* so we update ThisTimeLineID before replaying it. |
|
|
|
* ThisTimeLineID before replaying it. That's important so |
|
|
|
* That's important so that replayEndTLI, which is |
|
|
|
* that replayEndTLI, which is recorded as the minimum |
|
|
|
* recorded as the minimum recovery point's TLI if |
|
|
|
* recovery point's TLI if recovery stops after this record, |
|
|
|
* recovery stops after this record, is set correctly. |
|
|
|
* is set correctly. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (record->xl_rmid == RM_XLOG_ID) |
|
|
|
if (record->xl_rmid == RM_XLOG_ID) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -5952,8 +5956,9 @@ StartupXLOG(void) |
|
|
|
* allows some extra error checking in xlog_redo. |
|
|
|
* allows some extra error checking in xlog_redo. |
|
|
|
* |
|
|
|
* |
|
|
|
* In fast promotion, only create a lightweight end-of-recovery record |
|
|
|
* In fast promotion, only create a lightweight end-of-recovery record |
|
|
|
* instead of a full checkpoint. A checkpoint is requested later, after |
|
|
|
* instead of a full checkpoint. A checkpoint is requested later, |
|
|
|
* we're fully out of recovery mode and already accepting queries. |
|
|
|
* after we're fully out of recovery mode and already accepting |
|
|
|
|
|
|
|
* queries. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (bgwriterLaunched) |
|
|
|
if (bgwriterLaunched) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -5972,14 +5977,15 @@ StartupXLOG(void) |
|
|
|
fast_promoted = true; |
|
|
|
fast_promoted = true; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Insert a special WAL record to mark the end of recovery, |
|
|
|
* Insert a special WAL record to mark the end of |
|
|
|
* since we aren't doing a checkpoint. That means that the |
|
|
|
* recovery, since we aren't doing a checkpoint. That |
|
|
|
* checkpointer process may likely be in the middle of a |
|
|
|
* means that the checkpointer process may likely be in |
|
|
|
* time-smoothed restartpoint and could continue to be for |
|
|
|
* the middle of a time-smoothed restartpoint and could |
|
|
|
* minutes after this. That sounds strange, but the effect |
|
|
|
* continue to be for minutes after this. That sounds |
|
|
|
* is roughly the same and it would be stranger to try to |
|
|
|
* strange, but the effect is roughly the same and it |
|
|
|
* come out of the restartpoint and then checkpoint. |
|
|
|
* would be stranger to try to come out of the |
|
|
|
* We request a checkpoint later anyway, just for safety. |
|
|
|
* restartpoint and then checkpoint. We request a |
|
|
|
|
|
|
|
* checkpoint later anyway, just for safety. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
CreateEndOfRecoveryRecord(); |
|
|
|
CreateEndOfRecoveryRecord(); |
|
|
|
} |
|
|
|
} |
|
|
@ -6092,8 +6098,8 @@ StartupXLOG(void) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If there were cascading standby servers connected to us, nudge any |
|
|
|
* If there were cascading standby servers connected to us, nudge any wal |
|
|
|
* wal sender processes to notice that we've been promoted. |
|
|
|
* sender processes to notice that we've been promoted. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
WalSndWakeup(); |
|
|
|
WalSndWakeup(); |
|
|
|
|
|
|
|
|
|
|
@ -6151,9 +6157,9 @@ CheckRecoveryConsistency(void) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Have we passed our safe starting point? Note that minRecoveryPoint |
|
|
|
* Have we passed our safe starting point? Note that minRecoveryPoint is |
|
|
|
* is known to be incorrectly set if ControlFile->backupEndRequired, |
|
|
|
* known to be incorrectly set if ControlFile->backupEndRequired, until |
|
|
|
* until the XLOG_BACKUP_RECORD arrives to advise us of the correct |
|
|
|
* the XLOG_BACKUP_RECORD arrives to advise us of the correct |
|
|
|
* minRecoveryPoint. All we know prior to that is that we're not |
|
|
|
* minRecoveryPoint. All we know prior to that is that we're not |
|
|
|
* consistent yet. |
|
|
|
* consistent yet. |
|
|
|
*/ |
|
|
|
*/ |
|
|
@ -6946,8 +6952,8 @@ CreateCheckPoint(int flags) |
|
|
|
TRACE_POSTGRESQL_CHECKPOINT_START(flags); |
|
|
|
TRACE_POSTGRESQL_CHECKPOINT_START(flags); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* In some cases there are groups of actions that must all occur on |
|
|
|
* In some cases there are groups of actions that must all occur on one |
|
|
|
* one side or the other of a checkpoint record. Before flushing the |
|
|
|
* side or the other of a checkpoint record. Before flushing the |
|
|
|
* checkpoint record we must explicitly wait for any backend currently |
|
|
|
* checkpoint record we must explicitly wait for any backend currently |
|
|
|
* performing those groups of actions. |
|
|
|
* performing those groups of actions. |
|
|
|
* |
|
|
|
* |
|
|
@ -7211,8 +7217,8 @@ CreateEndOfRecoveryRecord(void) |
|
|
|
XLogFlush(recptr); |
|
|
|
XLogFlush(recptr); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Update the control file so that crash recovery can follow |
|
|
|
* Update the control file so that crash recovery can follow the timeline |
|
|
|
* the timeline changes to this point. |
|
|
|
* changes to this point. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); |
|
|
|
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); |
|
|
|
ControlFile->time = (pg_time_t) xlrec.end_time; |
|
|
|
ControlFile->time = (pg_time_t) xlrec.end_time; |
|
|
@ -7458,7 +7464,8 @@ CreateRestartPoint(int flags) |
|
|
|
XLogRecPtr endptr; |
|
|
|
XLogRecPtr endptr; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Get the current end of xlog replayed or received, whichever is later. |
|
|
|
* Get the current end of xlog replayed or received, whichever is |
|
|
|
|
|
|
|
* later. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
receivePtr = GetWalRcvWriteRecPtr(NULL, NULL); |
|
|
|
receivePtr = GetWalRcvWriteRecPtr(NULL, NULL); |
|
|
|
replayPtr = GetXLogReplayRecPtr(NULL); |
|
|
|
replayPtr = GetXLogReplayRecPtr(NULL); |
|
|
@ -7468,8 +7475,8 @@ CreateRestartPoint(int flags) |
|
|
|
_logSegNo--; |
|
|
|
_logSegNo--; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Update ThisTimeLineID to the timeline we're currently replaying, |
|
|
|
* Update ThisTimeLineID to the timeline we're currently replaying, so |
|
|
|
* so that we install any recycled segments on that timeline. |
|
|
|
* that we install any recycled segments on that timeline. |
|
|
|
* |
|
|
|
* |
|
|
|
* There is no guarantee that the WAL segments will be useful on the |
|
|
|
* There is no guarantee that the WAL segments will be useful on the |
|
|
|
* current timeline; if recovery proceeds to a new timeline right |
|
|
|
* current timeline; if recovery proceeds to a new timeline right |
|
|
@ -7480,8 +7487,8 @@ CreateRestartPoint(int flags) |
|
|
|
* It's possible or perhaps even likely that we finish recovery while |
|
|
|
* It's possible or perhaps even likely that we finish recovery while |
|
|
|
* a restartpoint is in progress. That means we may get to this point |
|
|
|
* a restartpoint is in progress. That means we may get to this point |
|
|
|
* some minutes afterwards. Setting ThisTimeLineID at that time would |
|
|
|
* some minutes afterwards. Setting ThisTimeLineID at that time would |
|
|
|
* actually set it backwards, so we don't want that to persist; if |
|
|
|
* actually set it backwards, so we don't want that to persist; if we |
|
|
|
* we do reset it here, make sure to reset it back afterwards. This |
|
|
|
* do reset it here, make sure to reset it back afterwards. This |
|
|
|
* doesn't look very clean or principled, but its the best of about |
|
|
|
* doesn't look very clean or principled, but its the best of about |
|
|
|
* five different ways of handling this edge case. |
|
|
|
* five different ways of handling this edge case. |
|
|
|
*/ |
|
|
|
*/ |
|
|
@ -7693,8 +7700,8 @@ XLogSaveBufferForHint(Buffer buffer) |
|
|
|
GetRedoRecPtr(); |
|
|
|
GetRedoRecPtr(); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Setup phony rdata element for use within XLogCheckBuffer only. |
|
|
|
* Setup phony rdata element for use within XLogCheckBuffer only. We reuse |
|
|
|
* We reuse and reset rdata for any actual WAL record insert. |
|
|
|
* and reset rdata for any actual WAL record insert. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
rdata[0].buffer = buffer; |
|
|
|
rdata[0].buffer = buffer; |
|
|
|
rdata[0].buffer_std = true; |
|
|
|
rdata[0].buffer_std = true; |
|
|
@ -7861,10 +7868,10 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI) |
|
|
|
ereport(PANIC, |
|
|
|
ereport(PANIC, |
|
|
|
(errmsg("unexpected prev timeline ID %u (current timeline ID %u) in checkpoint record", |
|
|
|
(errmsg("unexpected prev timeline ID %u (current timeline ID %u) in checkpoint record", |
|
|
|
prevTLI, ThisTimeLineID))); |
|
|
|
prevTLI, ThisTimeLineID))); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* The new timeline better be in the list of timelines we expect |
|
|
|
* The new timeline better be in the list of timelines we expect to see, |
|
|
|
* to see, according to the timeline history. It should also not |
|
|
|
* according to the timeline history. It should also not decrease. |
|
|
|
* decrease. |
|
|
|
|
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs)) |
|
|
|
if (newTLI < ThisTimeLineID || !tliInHistory(newTLI, expectedTLEs)) |
|
|
|
ereport(PANIC, |
|
|
|
ereport(PANIC, |
|
|
@ -7872,14 +7879,13 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI) |
|
|
|
newTLI, ThisTimeLineID))); |
|
|
|
newTLI, ThisTimeLineID))); |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If we have not yet reached min recovery point, and we're about |
|
|
|
* If we have not yet reached min recovery point, and we're about to |
|
|
|
* to switch to a timeline greater than the timeline of the min |
|
|
|
* switch to a timeline greater than the timeline of the min recovery |
|
|
|
* recovery point: trouble. After switching to the new timeline, |
|
|
|
* point: trouble. After switching to the new timeline, we could not |
|
|
|
* we could not possibly visit the min recovery point on the |
|
|
|
* possibly visit the min recovery point on the correct timeline anymore. |
|
|
|
* correct timeline anymore. This can happen if there is a newer |
|
|
|
* This can happen if there is a newer timeline in the archive that |
|
|
|
* timeline in the archive that branched before the timeline the |
|
|
|
* branched before the timeline the min recovery point is on, and you |
|
|
|
* min recovery point is on, and you attempt to do PITR to the |
|
|
|
* attempt to do PITR to the new timeline. |
|
|
|
* new timeline. |
|
|
|
|
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (!XLogRecPtrIsInvalid(minRecoveryPoint) && |
|
|
|
if (!XLogRecPtrIsInvalid(minRecoveryPoint) && |
|
|
|
lsn < minRecoveryPoint && |
|
|
|
lsn < minRecoveryPoint && |
|
|
@ -8105,17 +8111,17 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) |
|
|
|
BkpBlock bkpb; |
|
|
|
BkpBlock bkpb; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Hint bit records contain a backup block stored "inline" in the normal |
|
|
|
* Hint bit records contain a backup block stored "inline" in the |
|
|
|
* data since the locking when writing hint records isn't sufficient to |
|
|
|
* normal data since the locking when writing hint records isn't |
|
|
|
* use the normal backup block mechanism, which assumes exclusive lock |
|
|
|
* sufficient to use the normal backup block mechanism, which assumes |
|
|
|
* on the buffer supplied. |
|
|
|
* exclusive lock on the buffer supplied. |
|
|
|
* |
|
|
|
* |
|
|
|
* Since the only change in these backup block are hint bits, there are |
|
|
|
* Since the only change in these backup block are hint bits, there |
|
|
|
* no recovery conflicts generated. |
|
|
|
* are no recovery conflicts generated. |
|
|
|
* |
|
|
|
* |
|
|
|
* This also means there is no corresponding API call for this, |
|
|
|
* This also means there is no corresponding API call for this, so an |
|
|
|
* so an smgr implementation has no need to implement anything. |
|
|
|
* smgr implementation has no need to implement anything. Which means |
|
|
|
* Which means nothing is needed in md.c etc |
|
|
|
* nothing is needed in md.c etc |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
data = XLogRecGetData(record); |
|
|
|
data = XLogRecGetData(record); |
|
|
|
memcpy(&bkpb, data, sizeof(BkpBlock)); |
|
|
|
memcpy(&bkpb, data, sizeof(BkpBlock)); |
|
|
@ -8379,6 +8385,7 @@ char * |
|
|
|
XLogFileNameP(TimeLineID tli, XLogSegNo segno) |
|
|
|
XLogFileNameP(TimeLineID tli, XLogSegNo segno) |
|
|
|
{ |
|
|
|
{ |
|
|
|
char *result = palloc(MAXFNAMELEN); |
|
|
|
char *result = palloc(MAXFNAMELEN); |
|
|
|
|
|
|
|
|
|
|
|
XLogFileName(result, tli, segno); |
|
|
|
XLogFileName(result, tli, segno); |
|
|
|
return result; |
|
|
|
return result; |
|
|
|
} |
|
|
|
} |
|
|
@ -9571,11 +9578,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
break; |
|
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
case XLOG_FROM_PG_XLOG: |
|
|
|
case XLOG_FROM_PG_XLOG: |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Check to see if the trigger file exists. Note that we do |
|
|
|
* Check to see if the trigger file exists. Note that we |
|
|
|
* this only after failure, so when you create the trigger |
|
|
|
* do this only after failure, so when you create the |
|
|
|
* file, we still finish replaying as much as we can from |
|
|
|
* trigger file, we still finish replaying as much as we |
|
|
|
* archive and pg_xlog before failover. |
|
|
|
* can from archive and pg_xlog before failover. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (StandbyMode && CheckForStandbyTrigger()) |
|
|
|
if (StandbyMode && CheckForStandbyTrigger()) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -9584,15 +9592,15 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Not in standby mode, and we've now tried the archive and |
|
|
|
* Not in standby mode, and we've now tried the archive |
|
|
|
* pg_xlog. |
|
|
|
* and pg_xlog. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (!StandbyMode) |
|
|
|
if (!StandbyMode) |
|
|
|
return false; |
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* If primary_conninfo is set, launch walreceiver to try to |
|
|
|
* If primary_conninfo is set, launch walreceiver to try |
|
|
|
* stream the missing WAL. |
|
|
|
* to stream the missing WAL. |
|
|
|
* |
|
|
|
* |
|
|
|
* If fetching_ckpt is TRUE, RecPtr points to the initial |
|
|
|
* If fetching_ckpt is TRUE, RecPtr points to the initial |
|
|
|
* checkpoint location. In that case, we use RedoStartLSN |
|
|
|
* checkpoint location. In that case, we use RedoStartLSN |
|
|
@ -9624,28 +9632,32 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
RequestXLogStreaming(tli, ptr, PrimaryConnInfo); |
|
|
|
RequestXLogStreaming(tli, ptr, PrimaryConnInfo); |
|
|
|
receivedUpto = 0; |
|
|
|
receivedUpto = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Move to XLOG_FROM_STREAM state in either case. We'll get |
|
|
|
* Move to XLOG_FROM_STREAM state in either case. We'll |
|
|
|
* immediate failure if we didn't launch walreceiver, and |
|
|
|
* get immediate failure if we didn't launch walreceiver, |
|
|
|
* move on to the next state. |
|
|
|
* and move on to the next state. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
currentSource = XLOG_FROM_STREAM; |
|
|
|
currentSource = XLOG_FROM_STREAM; |
|
|
|
break; |
|
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
case XLOG_FROM_STREAM: |
|
|
|
case XLOG_FROM_STREAM: |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Failure while streaming. Most likely, we got here because |
|
|
|
* Failure while streaming. Most likely, we got here |
|
|
|
* streaming replication was terminated, or promotion was |
|
|
|
* because streaming replication was terminated, or |
|
|
|
* triggered. But we also get here if we find an invalid |
|
|
|
* promotion was triggered. But we also get here if we |
|
|
|
* record in the WAL streamed from master, in which case |
|
|
|
* find an invalid record in the WAL streamed from master, |
|
|
|
* something is seriously wrong. There's little chance that |
|
|
|
* in which case something is seriously wrong. There's |
|
|
|
* the problem will just go away, but PANIC is not good for |
|
|
|
* little chance that the problem will just go away, but |
|
|
|
* availability either, especially in hot standby mode. So, |
|
|
|
* PANIC is not good for availability either, especially |
|
|
|
* we treat that the same as disconnection, and retry from |
|
|
|
* in hot standby mode. So, we treat that the same as |
|
|
|
* archive/pg_xlog again. The WAL in the archive should be |
|
|
|
* disconnection, and retry from archive/pg_xlog again. |
|
|
|
* identical to what was streamed, so it's unlikely that it |
|
|
|
* The WAL in the archive should be identical to what was |
|
|
|
* helps, but one can hope... |
|
|
|
* streamed, so it's unlikely that it helps, but one can |
|
|
|
|
|
|
|
* hope... |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Before we leave XLOG_FROM_STREAM state, make sure that |
|
|
|
* Before we leave XLOG_FROM_STREAM state, make sure that |
|
|
|
* walreceiver is not active, so that it won't overwrite |
|
|
|
* walreceiver is not active, so that it won't overwrite |
|
|
@ -9668,11 +9680,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* XLOG_FROM_STREAM is the last state in our state machine, |
|
|
|
* XLOG_FROM_STREAM is the last state in our state |
|
|
|
* so we've exhausted all the options for obtaining the |
|
|
|
* machine, so we've exhausted all the options for |
|
|
|
* requested WAL. We're going to loop back and retry from |
|
|
|
* obtaining the requested WAL. We're going to loop back |
|
|
|
* the archive, but if it hasn't been long since last |
|
|
|
* and retry from the archive, but if it hasn't been long |
|
|
|
* attempt, sleep 5 seconds to avoid busy-waiting. |
|
|
|
* since last attempt, sleep 5 seconds to avoid |
|
|
|
|
|
|
|
* busy-waiting. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
now = (pg_time_t) time(NULL); |
|
|
|
now = (pg_time_t) time(NULL); |
|
|
|
if ((now - last_fail_time) < 5) |
|
|
|
if ((now - last_fail_time) < 5) |
|
|
@ -9691,9 +9704,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
else if (currentSource == XLOG_FROM_PG_XLOG) |
|
|
|
else if (currentSource == XLOG_FROM_PG_XLOG) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* We just successfully read a file in pg_xlog. We prefer files |
|
|
|
* We just successfully read a file in pg_xlog. We prefer files in |
|
|
|
* in the archive over ones in pg_xlog, so try the next file |
|
|
|
* the archive over ones in pg_xlog, so try the next file again |
|
|
|
* again from the archive first. |
|
|
|
* from the archive first. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (InArchiveRecovery) |
|
|
|
if (InArchiveRecovery) |
|
|
|
currentSource = XLOG_FROM_ARCHIVE; |
|
|
|
currentSource = XLOG_FROM_ARCHIVE; |
|
|
@ -9754,13 +9767,14 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Walreceiver is active, so see if new data has arrived. |
|
|
|
* Walreceiver is active, so see if new data has arrived. |
|
|
|
* |
|
|
|
* |
|
|
|
* We only advance XLogReceiptTime when we obtain fresh WAL |
|
|
|
* We only advance XLogReceiptTime when we obtain fresh |
|
|
|
* from walreceiver and observe that we had already processed |
|
|
|
* WAL from walreceiver and observe that we had already |
|
|
|
* everything before the most recent "chunk" that it flushed to |
|
|
|
* processed everything before the most recent "chunk" |
|
|
|
* disk. In steady state where we are keeping up with the |
|
|
|
* that it flushed to disk. In steady state where we are |
|
|
|
* incoming data, XLogReceiptTime will be updated on each cycle. |
|
|
|
* keeping up with the incoming data, XLogReceiptTime will |
|
|
|
* When we are behind, XLogReceiptTime will not advance, so the |
|
|
|
* be updated on each cycle. When we are behind, |
|
|
|
* grace time allotted to conflicting queries will decrease. |
|
|
|
* XLogReceiptTime will not advance, so the grace time |
|
|
|
|
|
|
|
* allotted to conflicting queries will decrease. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (RecPtr < receivedUpto) |
|
|
|
if (RecPtr < receivedUpto) |
|
|
|
havedata = true; |
|
|
|
havedata = true; |
|
|
@ -9784,12 +9798,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
if (havedata) |
|
|
|
if (havedata) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Great, streamed far enough. Open the file if it's not |
|
|
|
* Great, streamed far enough. Open the file if it's |
|
|
|
* open already. Also read the timeline history file if |
|
|
|
* not open already. Also read the timeline history |
|
|
|
* we haven't initialized timeline history yet; it should |
|
|
|
* file if we haven't initialized timeline history |
|
|
|
* be streamed over and present in pg_xlog by now. Use |
|
|
|
* yet; it should be streamed over and present in |
|
|
|
* XLOG_FROM_STREAM so that source info is set correctly |
|
|
|
* pg_xlog by now. Use XLOG_FROM_STREAM so that |
|
|
|
* and XLogReceiptTime isn't changed. |
|
|
|
* source info is set correctly and XLogReceiptTime |
|
|
|
|
|
|
|
* isn't changed. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (readFile < 0) |
|
|
|
if (readFile < 0) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -9818,20 +9833,21 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, |
|
|
|
{ |
|
|
|
{ |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Note that we don't "return false" immediately here. |
|
|
|
* Note that we don't "return false" immediately here. |
|
|
|
* After being triggered, we still want to replay all the |
|
|
|
* After being triggered, we still want to replay all |
|
|
|
* WAL that was already streamed. It's in pg_xlog now, so |
|
|
|
* the WAL that was already streamed. It's in pg_xlog |
|
|
|
* we just treat this as a failure, and the state machine |
|
|
|
* now, so we just treat this as a failure, and the |
|
|
|
* will move on to replay the streamed WAL from pg_xlog, |
|
|
|
* state machine will move on to replay the streamed |
|
|
|
* and then recheck the trigger and exit replay. |
|
|
|
* WAL from pg_xlog, and then recheck the trigger and |
|
|
|
|
|
|
|
* exit replay. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
lastSourceFailed = true; |
|
|
|
lastSourceFailed = true; |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* Wait for more WAL to arrive. Time out after 5 seconds, like |
|
|
|
* Wait for more WAL to arrive. Time out after 5 seconds, |
|
|
|
* when polling the archive, to react to a trigger file |
|
|
|
* like when polling the archive, to react to a trigger |
|
|
|
* promptly. |
|
|
|
* file promptly. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
WaitLatch(&XLogCtl->recoveryWakeupLatch, |
|
|
|
WaitLatch(&XLogCtl->recoveryWakeupLatch, |
|
|
|
WL_LATCH_SET | WL_TIMEOUT, |
|
|
|
WL_LATCH_SET | WL_TIMEOUT, |
|
|
@ -9903,11 +9919,10 @@ CheckForStandbyTrigger(void) |
|
|
|
if (IsPromoteTriggered()) |
|
|
|
if (IsPromoteTriggered()) |
|
|
|
{ |
|
|
|
{ |
|
|
|
/*
|
|
|
|
/*
|
|
|
|
* In 9.1 and 9.2 the postmaster unlinked the promote file |
|
|
|
* In 9.1 and 9.2 the postmaster unlinked the promote file inside the |
|
|
|
* inside the signal handler. We now leave the file in place |
|
|
|
* signal handler. We now leave the file in place and let the Startup |
|
|
|
* and let the Startup process do the unlink. This allows |
|
|
|
* process do the unlink. This allows Startup to know whether we're |
|
|
|
* Startup to know whether we're doing fast or normal |
|
|
|
* doing fast or normal promotion. Fast promotion takes precedence. |
|
|
|
* promotion. Fast promotion takes precedence. |
|
|
|
|
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0) |
|
|
|
if (stat(FAST_PROMOTE_SIGNAL_FILE, &stat_buf) == 0) |
|
|
|
{ |
|
|
|
{ |
|
|
|