mirror of https://github.com/postgres/postgres
pg_standby was useful more than a decade ago, but now it is obsolete. It has been proposed that we retire it many times. Now seems like a good time to finally do it, because "waiting restore commands" are incompatible with a proposed recovery prefetching feature. Discussion: https://postgr.es/m/20201029024412.GP5380%40telsasoft.com Author: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com>pull/61/head
parent
1046dbedde
commit
514b411a2b
@ -1 +0,0 @@ |
|||||||
/pg_standby |
|
@ -1,20 +0,0 @@ |
|||||||
# contrib/pg_standby/Makefile
|
|
||||||
|
|
||||||
PGFILEDESC = "pg_standby - supports creation of a warm standby"
|
|
||||||
PGAPPICON = win32
|
|
||||||
|
|
||||||
PROGRAM = pg_standby
|
|
||||||
OBJS = \
|
|
||||||
$(WIN32RES) \
|
|
||||||
pg_standby.o
|
|
||||||
|
|
||||||
ifdef USE_PGXS |
|
||||||
PG_CONFIG = pg_config
|
|
||||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
|
||||||
include $(PGXS) |
|
||||||
else |
|
||||||
subdir = contrib/pg_standby
|
|
||||||
top_builddir = ../..
|
|
||||||
include $(top_builddir)/src/Makefile.global |
|
||||||
include $(top_srcdir)/contrib/contrib-global.mk |
|
||||||
endif |
|
@ -1,907 +0,0 @@ |
|||||||
/*
|
|
||||||
* contrib/pg_standby/pg_standby.c |
|
||||||
* |
|
||||||
* |
|
||||||
* pg_standby.c |
|
||||||
* |
|
||||||
* Production-ready example of how to create a Warm Standby |
|
||||||
* database server using continuous archiving as a |
|
||||||
* replication mechanism |
|
||||||
* |
|
||||||
* We separate the parameters for archive and nextWALfile |
|
||||||
* so that we can check the archive exists, even if the |
|
||||||
* WAL file doesn't (yet). |
|
||||||
* |
|
||||||
* This program will be executed once in full for each file |
|
||||||
* requested by the warm standby server. |
|
||||||
* |
|
||||||
* It is designed to cater to a variety of needs, as well |
|
||||||
* providing a customizable section. |
|
||||||
* |
|
||||||
* Original author: Simon Riggs simon@2ndquadrant.com |
|
||||||
* Current maintainer: Simon Riggs |
|
||||||
*/ |
|
||||||
#include "postgres_fe.h" |
|
||||||
|
|
||||||
#include <ctype.h> |
|
||||||
#include <dirent.h> |
|
||||||
#include <sys/stat.h> |
|
||||||
#include <fcntl.h> |
|
||||||
#include <signal.h> |
|
||||||
#include <sys/time.h> |
|
||||||
|
|
||||||
#include "access/xlog_internal.h" |
|
||||||
#include "pg_getopt.h" |
|
||||||
|
|
||||||
const char *progname; |
|
||||||
|
|
||||||
int WalSegSz = -1; |
|
||||||
|
|
||||||
/* Options and defaults */ |
|
||||||
int sleeptime = 5; /* amount of time to sleep between file checks */ |
|
||||||
int waittime = -1; /* how long we have been waiting, -1 no wait
|
|
||||||
* yet */ |
|
||||||
int maxwaittime = 0; /* how long are we prepared to wait for? */ |
|
||||||
int keepfiles = 0; /* number of WAL files to keep, 0 keep all */ |
|
||||||
int maxretries = 3; /* number of retries on restore command */ |
|
||||||
bool debug = false; /* are we debugging? */ |
|
||||||
bool need_cleanup = false; /* do we need to remove files from
|
|
||||||
* archive? */ |
|
||||||
|
|
||||||
#ifndef WIN32 |
|
||||||
static volatile sig_atomic_t signaled = false; |
|
||||||
#endif |
|
||||||
|
|
||||||
char *archiveLocation; /* where to find the archive? */ |
|
||||||
char *triggerPath; /* where to find the trigger file? */ |
|
||||||
char *xlogFilePath; /* where we are going to restore to */ |
|
||||||
char *nextWALFileName; /* the file we need to get from archive */ |
|
||||||
char *restartWALFileName; /* the file from which we can restart restore */ |
|
||||||
char WALFilePath[MAXPGPATH * 2]; /* the file path including archive */ |
|
||||||
char restoreCommand[MAXPGPATH]; /* run this to restore */ |
|
||||||
char exclusiveCleanupFileName[MAXFNAMELEN]; /* the file we need to get
|
|
||||||
* from archive */ |
|
||||||
|
|
||||||
/*
|
|
||||||
* Two types of failover are supported (smart and fast failover). |
|
||||||
* |
|
||||||
* The content of the trigger file determines the type of failover. If the |
|
||||||
* trigger file contains the word "smart" (or the file is empty), smart |
|
||||||
* failover is chosen: pg_standby acts as cp or ln command itself, on |
|
||||||
* successful completion all the available WAL records will be applied |
|
||||||
* resulting in zero data loss. But, it might take a long time to finish |
|
||||||
* recovery if there's a lot of unapplied WAL. |
|
||||||
* |
|
||||||
* On the other hand, if the trigger file contains the word "fast", the |
|
||||||
* recovery is finished immediately even if unapplied WAL files remain. Any |
|
||||||
* transactions in the unapplied WAL files are lost. |
|
||||||
* |
|
||||||
* An empty trigger file performs smart failover. SIGUSR or SIGINT triggers |
|
||||||
* fast failover. A timeout causes fast failover (smart failover would have |
|
||||||
* the same effect, since if the timeout is reached there is no unapplied WAL). |
|
||||||
*/ |
|
||||||
#define NoFailover 0 |
|
||||||
#define SmartFailover 1 |
|
||||||
#define FastFailover 2 |
|
||||||
|
|
||||||
static int Failover = NoFailover; |
|
||||||
|
|
||||||
#define RESTORE_COMMAND_COPY 0 |
|
||||||
#define RESTORE_COMMAND_LINK 1 |
|
||||||
int restoreCommandType; |
|
||||||
|
|
||||||
#define XLOG_DATA 0 |
|
||||||
#define XLOG_HISTORY 1 |
|
||||||
int nextWALFileType; |
|
||||||
|
|
||||||
#define SET_RESTORE_COMMAND(cmd, arg1, arg2) \ |
|
||||||
snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2) |
|
||||||
|
|
||||||
struct stat stat_buf; |
|
||||||
|
|
||||||
static bool SetWALFileNameForCleanup(void); |
|
||||||
static bool SetWALSegSize(void); |
|
||||||
|
|
||||||
|
|
||||||
/* =====================================================================
|
|
||||||
* |
|
||||||
* Customizable section |
|
||||||
* |
|
||||||
* ===================================================================== |
|
||||||
* |
|
||||||
* Currently, this section assumes that the Archive is a locally |
|
||||||
* accessible directory. If you want to make other assumptions, |
|
||||||
* such as using a vendor-specific archive and access API, these |
|
||||||
* routines are the ones you'll need to change. You're |
|
||||||
* encouraged to submit any changes to pgsql-hackers@lists.postgresql.org |
|
||||||
* or personally to the current maintainer. Those changes may be |
|
||||||
* folded in to later versions of this program. |
|
||||||
*/ |
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize allows customized commands into the warm standby program. |
|
||||||
* |
|
||||||
* As an example, and probably the common case, we use either |
|
||||||
* cp/ln commands on *nix, or copy/move command on Windows. |
|
||||||
*/ |
|
||||||
static void |
|
||||||
CustomizableInitialize(void) |
|
||||||
{ |
|
||||||
#ifdef WIN32 |
|
||||||
snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName); |
|
||||||
switch (restoreCommandType) |
|
||||||
{ |
|
||||||
case RESTORE_COMMAND_LINK: |
|
||||||
SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath); |
|
||||||
break; |
|
||||||
case RESTORE_COMMAND_COPY: |
|
||||||
default: |
|
||||||
SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath); |
|
||||||
break; |
|
||||||
} |
|
||||||
#else |
|
||||||
snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName); |
|
||||||
switch (restoreCommandType) |
|
||||||
{ |
|
||||||
case RESTORE_COMMAND_LINK: |
|
||||||
SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath); |
|
||||||
break; |
|
||||||
case RESTORE_COMMAND_COPY: |
|
||||||
default: |
|
||||||
SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath); |
|
||||||
break; |
|
||||||
} |
|
||||||
#endif |
|
||||||
|
|
||||||
/*
|
|
||||||
* This code assumes that archiveLocation is a directory You may wish to |
|
||||||
* add code to check for tape libraries, etc.. So, since it is a |
|
||||||
* directory, we use stat to test if it's accessible |
|
||||||
*/ |
|
||||||
if (stat(archiveLocation, &stat_buf) != 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation); |
|
||||||
fflush(stderr); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* CustomizableNextWALFileReady() |
|
||||||
* |
|
||||||
* Is the requested file ready yet? |
|
||||||
*/ |
|
||||||
static bool |
|
||||||
CustomizableNextWALFileReady(void) |
|
||||||
{ |
|
||||||
if (stat(WALFilePath, &stat_buf) == 0) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* If we've not seen any WAL segments, we don't know the WAL segment |
|
||||||
* size, which we need. If it looks like a WAL segment, determine size |
|
||||||
* of segments for the cluster. |
|
||||||
*/ |
|
||||||
if (WalSegSz == -1 && IsXLogFileName(nextWALFileName)) |
|
||||||
{ |
|
||||||
if (SetWALSegSize()) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Successfully determined WAL segment size. Can compute |
|
||||||
* cleanup cutoff now. |
|
||||||
*/ |
|
||||||
need_cleanup = SetWALFileNameForCleanup(); |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, |
|
||||||
_("WAL segment size: %d \n"), WalSegSz); |
|
||||||
fprintf(stderr, "Keep archive history: "); |
|
||||||
|
|
||||||
if (need_cleanup) |
|
||||||
fprintf(stderr, "%s and later\n", |
|
||||||
exclusiveCleanupFileName); |
|
||||||
else |
|
||||||
fprintf(stderr, "no cleanup required\n"); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Return only if it's the right size already. |
|
||||||
*/ |
|
||||||
if (WalSegSz > 0 && stat_buf.st_size == WalSegSz) |
|
||||||
{ |
|
||||||
#ifdef WIN32 |
|
||||||
|
|
||||||
/*
|
|
||||||
* Windows 'cp' sets the final file size before the copy is |
|
||||||
* complete, and not yet ready to be opened by pg_standby. So we |
|
||||||
* wait for sleeptime secs before attempting to restore. If that |
|
||||||
* is not enough, we will rely on the retry/holdoff mechanism. |
|
||||||
* GNUWin32's cp does not have this problem. |
|
||||||
*/ |
|
||||||
pg_usleep(sleeptime * 1000000L); |
|
||||||
#endif |
|
||||||
nextWALFileType = XLOG_DATA; |
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* If still too small, wait until it is the correct size |
|
||||||
*/ |
|
||||||
if (WalSegSz > 0 && stat_buf.st_size > WalSegSz) |
|
||||||
{ |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "file size greater than expected\n"); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
exit(3); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
static void |
|
||||||
CustomizableCleanupPriorWALFiles(void) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Work out name of prior file from current filename |
|
||||||
*/ |
|
||||||
if (nextWALFileType == XLOG_DATA) |
|
||||||
{ |
|
||||||
int rc; |
|
||||||
DIR *xldir; |
|
||||||
struct dirent *xlde; |
|
||||||
|
|
||||||
/*
|
|
||||||
* Assume it's OK to keep failing. The failure situation may change |
|
||||||
* over time, so we'd rather keep going on the main processing than |
|
||||||
* fail because we couldn't clean up yet. |
|
||||||
*/ |
|
||||||
if ((xldir = opendir(archiveLocation)) != NULL) |
|
||||||
{ |
|
||||||
while (errno = 0, (xlde = readdir(xldir)) != NULL) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* We ignore the timeline part of the XLOG segment identifiers |
|
||||||
* in deciding whether a segment is still needed. This |
|
||||||
* ensures that we won't prematurely remove a segment from a |
|
||||||
* parent timeline. We could probably be a little more |
|
||||||
* proactive about removing segments of non-parent timelines, |
|
||||||
* but that would be a whole lot more complicated. |
|
||||||
* |
|
||||||
* We use the alphanumeric sorting property of the filenames |
|
||||||
* to decide which ones are earlier than the |
|
||||||
* exclusiveCleanupFileName file. Note that this means files |
|
||||||
* are not removed in the order they were originally written, |
|
||||||
* in case this worries you. |
|
||||||
*/ |
|
||||||
if (IsXLogFileName(xlde->d_name) && |
|
||||||
strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0) |
|
||||||
{ |
|
||||||
#ifdef WIN32 |
|
||||||
snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name); |
|
||||||
#else |
|
||||||
snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name); |
|
||||||
#endif |
|
||||||
|
|
||||||
if (debug) |
|
||||||
fprintf(stderr, "\nremoving file \"%s\"", WALFilePath); |
|
||||||
|
|
||||||
rc = unlink(WALFilePath); |
|
||||||
if (rc != 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n", |
|
||||||
progname, WALFilePath, strerror(errno)); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if (errno) |
|
||||||
fprintf(stderr, "%s: could not read archive location \"%s\": %s\n", |
|
||||||
progname, archiveLocation, strerror(errno)); |
|
||||||
if (debug) |
|
||||||
fprintf(stderr, "\n"); |
|
||||||
} |
|
||||||
else |
|
||||||
fprintf(stderr, "%s: could not open archive location \"%s\": %s\n", |
|
||||||
progname, archiveLocation, strerror(errno)); |
|
||||||
|
|
||||||
if (closedir(xldir)) |
|
||||||
fprintf(stderr, "%s: could not close archive location \"%s\": %s\n", |
|
||||||
progname, archiveLocation, strerror(errno)); |
|
||||||
|
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/* =====================================================================
|
|
||||||
* End of Customizable section |
|
||||||
* ===================================================================== |
|
||||||
*/ |
|
||||||
|
|
||||||
/*
|
|
||||||
* SetWALFileNameForCleanup() |
|
||||||
* |
|
||||||
* Set the earliest WAL filename that we want to keep on the archive |
|
||||||
* and decide whether we need_cleanup |
|
||||||
*/ |
|
||||||
static bool |
|
||||||
SetWALFileNameForCleanup(void) |
|
||||||
{ |
|
||||||
uint32 tli = 1, |
|
||||||
log = 0, |
|
||||||
seg = 0; |
|
||||||
uint32 log_diff = 0, |
|
||||||
seg_diff = 0; |
|
||||||
bool cleanup = false; |
|
||||||
int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz); |
|
||||||
|
|
||||||
if (restartWALFileName) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Don't do cleanup if the restartWALFileName provided is later than |
|
||||||
* the xlog file requested. This is an error and we must not remove |
|
||||||
* these files from archive. This shouldn't happen, but better safe |
|
||||||
* than sorry. |
|
||||||
*/ |
|
||||||
if (strcmp(restartWALFileName, nextWALFileName) > 0) |
|
||||||
return false; |
|
||||||
|
|
||||||
strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName)); |
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
if (keepfiles > 0) |
|
||||||
{ |
|
||||||
sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg); |
|
||||||
if (tli > 0 && seg > 0) |
|
||||||
{ |
|
||||||
log_diff = keepfiles / max_segments_per_logfile; |
|
||||||
seg_diff = keepfiles % max_segments_per_logfile; |
|
||||||
if (seg_diff > seg) |
|
||||||
{ |
|
||||||
log_diff++; |
|
||||||
seg = max_segments_per_logfile - (seg_diff - seg); |
|
||||||
} |
|
||||||
else |
|
||||||
seg -= seg_diff; |
|
||||||
|
|
||||||
if (log >= log_diff) |
|
||||||
{ |
|
||||||
log -= log_diff; |
|
||||||
cleanup = true; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
log = 0; |
|
||||||
seg = 0; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
XLogFileNameById(exclusiveCleanupFileName, tli, log, seg); |
|
||||||
|
|
||||||
return cleanup; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Try to set the wal segment size from the WAL file specified by WALFilePath. |
|
||||||
* |
|
||||||
* Return true if size could be determined, false otherwise. |
|
||||||
*/ |
|
||||||
static bool |
|
||||||
SetWALSegSize(void) |
|
||||||
{ |
|
||||||
bool ret_val = false; |
|
||||||
int fd; |
|
||||||
PGAlignedXLogBlock buf; |
|
||||||
|
|
||||||
Assert(WalSegSz == -1); |
|
||||||
|
|
||||||
if ((fd = open(WALFilePath, O_RDWR, 0)) < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n", |
|
||||||
progname, WALFilePath, strerror(errno)); |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
errno = 0; |
|
||||||
if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ) |
|
||||||
{ |
|
||||||
XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data; |
|
||||||
|
|
||||||
WalSegSz = longhdr->xlp_seg_size; |
|
||||||
|
|
||||||
if (IsValidWalSegSize(WalSegSz)) |
|
||||||
{ |
|
||||||
/* successfully retrieved WAL segment size */ |
|
||||||
ret_val = true; |
|
||||||
} |
|
||||||
else |
|
||||||
fprintf(stderr, |
|
||||||
"%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n", |
|
||||||
progname, WalSegSz); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Don't complain loudly, this is to be expected for segments being |
|
||||||
* created. |
|
||||||
*/ |
|
||||||
if (errno != 0) |
|
||||||
{ |
|
||||||
if (debug) |
|
||||||
fprintf(stderr, "could not read file \"%s\": %s\n", |
|
||||||
WALFilePath, strerror(errno)); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
if (debug) |
|
||||||
fprintf(stderr, "not enough data in file \"%s\"\n", |
|
||||||
WALFilePath); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
fflush(stderr); |
|
||||||
|
|
||||||
close(fd); |
|
||||||
return ret_val; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* CheckForExternalTrigger() |
|
||||||
* |
|
||||||
* Is there a trigger file? Sets global 'Failover' variable to indicate |
|
||||||
* what kind of a trigger file it was. A "fast" trigger file is turned |
|
||||||
* into a "smart" file as a side-effect. |
|
||||||
*/ |
|
||||||
static void |
|
||||||
CheckForExternalTrigger(void) |
|
||||||
{ |
|
||||||
char buf[32]; |
|
||||||
int fd; |
|
||||||
int len; |
|
||||||
|
|
||||||
/*
|
|
||||||
* Look for a trigger file, if that option has been selected |
|
||||||
* |
|
||||||
* We use stat() here because triggerPath is always a file rather than |
|
||||||
* potentially being in an archive |
|
||||||
*/ |
|
||||||
if (!triggerPath || stat(triggerPath, &stat_buf) != 0) |
|
||||||
return; |
|
||||||
|
|
||||||
/*
|
|
||||||
* An empty trigger file performs smart failover. There's a little race |
|
||||||
* condition here: if the writer of the trigger file has just created the |
|
||||||
* file, but not yet written anything to it, we'll treat that as smart |
|
||||||
* shutdown even if the other process was just about to write "fast" to |
|
||||||
* it. But that's fine: we'll restore one more WAL file, and when we're |
|
||||||
* invoked next time, we'll see the word "fast" and fail over immediately. |
|
||||||
*/ |
|
||||||
if (stat_buf.st_size == 0) |
|
||||||
{ |
|
||||||
Failover = SmartFailover; |
|
||||||
fprintf(stderr, "trigger file found: smart failover\n"); |
|
||||||
fflush(stderr); |
|
||||||
return; |
|
||||||
} |
|
||||||
|
|
||||||
if ((fd = open(triggerPath, O_RDWR, 0)) < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "WARNING: could not open \"%s\": %s\n", |
|
||||||
triggerPath, strerror(errno)); |
|
||||||
fflush(stderr); |
|
||||||
return; |
|
||||||
} |
|
||||||
|
|
||||||
if ((len = read(fd, buf, sizeof(buf) - 1)) < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "WARNING: could not read \"%s\": %s\n", |
|
||||||
triggerPath, strerror(errno)); |
|
||||||
fflush(stderr); |
|
||||||
close(fd); |
|
||||||
return; |
|
||||||
} |
|
||||||
buf[len] = '\0'; |
|
||||||
|
|
||||||
if (strncmp(buf, "smart", 5) == 0) |
|
||||||
{ |
|
||||||
Failover = SmartFailover; |
|
||||||
fprintf(stderr, "trigger file found: smart failover\n"); |
|
||||||
fflush(stderr); |
|
||||||
close(fd); |
|
||||||
return; |
|
||||||
} |
|
||||||
|
|
||||||
if (strncmp(buf, "fast", 4) == 0) |
|
||||||
{ |
|
||||||
Failover = FastFailover; |
|
||||||
|
|
||||||
fprintf(stderr, "trigger file found: fast failover\n"); |
|
||||||
fflush(stderr); |
|
||||||
|
|
||||||
/*
|
|
||||||
* Turn it into a "smart" trigger by truncating the file. Otherwise if |
|
||||||
* the server asks us again to restore a segment that was restored |
|
||||||
* already, we would return "not found" and upset the server. |
|
||||||
*/ |
|
||||||
if (ftruncate(fd, 0) < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "WARNING: could not read \"%s\": %s\n", |
|
||||||
triggerPath, strerror(errno)); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
close(fd); |
|
||||||
|
|
||||||
return; |
|
||||||
} |
|
||||||
close(fd); |
|
||||||
|
|
||||||
fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* RestoreWALFileForRecovery() |
|
||||||
* |
|
||||||
* Perform the action required to restore the file from archive |
|
||||||
*/ |
|
||||||
static bool |
|
||||||
RestoreWALFileForRecovery(void) |
|
||||||
{ |
|
||||||
int rc = 0; |
|
||||||
int numretries = 0; |
|
||||||
|
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "running restore: "); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
|
|
||||||
while (numretries <= maxretries) |
|
||||||
{ |
|
||||||
rc = system(restoreCommand); |
|
||||||
if (rc == 0) |
|
||||||
{ |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "OK\n"); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
return true; |
|
||||||
} |
|
||||||
pg_usleep(numretries++ * sleeptime * 1000000L); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Allow caller to add additional info |
|
||||||
*/ |
|
||||||
if (debug) |
|
||||||
fprintf(stderr, "not restored\n"); |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
static void |
|
||||||
usage(void) |
|
||||||
{ |
|
||||||
printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname); |
|
||||||
printf("Usage:\n"); |
|
||||||
printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname); |
|
||||||
printf("\nOptions:\n"); |
|
||||||
printf(" -c copy file from archive (default)\n"); |
|
||||||
printf(" -d generate lots of debugging output (testing only)\n"); |
|
||||||
printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n" |
|
||||||
" (0 keeps all)\n"); |
|
||||||
printf(" -l does nothing; use of link is now deprecated\n"); |
|
||||||
printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n" |
|
||||||
" (default=3)\n"); |
|
||||||
printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n" |
|
||||||
" default=5)\n"); |
|
||||||
printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n"); |
|
||||||
printf(" -V, --version output version information, then exit\n"); |
|
||||||
printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n"); |
|
||||||
printf(" -?, --help show this help, then exit\n"); |
|
||||||
printf("\n" |
|
||||||
"Main intended use as restore_command in postgresql.conf:\n" |
|
||||||
" restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n" |
|
||||||
"e.g.\n" |
|
||||||
" restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n"); |
|
||||||
printf("\nReport bugs to <%s>.\n", PACKAGE_BUGREPORT); |
|
||||||
printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL); |
|
||||||
} |
|
||||||
|
|
||||||
#ifndef WIN32 |
|
||||||
static void |
|
||||||
sighandler(int sig) |
|
||||||
{ |
|
||||||
signaled = true; |
|
||||||
} |
|
||||||
|
|
||||||
/* We don't want SIGQUIT to core dump */ |
|
||||||
static void |
|
||||||
sigquit_handler(int sig) |
|
||||||
{ |
|
||||||
pqsignal(SIGINT, SIG_DFL); |
|
||||||
kill(getpid(), SIGINT); |
|
||||||
} |
|
||||||
#endif |
|
||||||
|
|
||||||
/*------------ MAIN ----------------------------------------*/ |
|
||||||
int |
|
||||||
main(int argc, char **argv) |
|
||||||
{ |
|
||||||
int c; |
|
||||||
|
|
||||||
progname = get_progname(argv[0]); |
|
||||||
|
|
||||||
if (argc > 1) |
|
||||||
{ |
|
||||||
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) |
|
||||||
{ |
|
||||||
usage(); |
|
||||||
exit(0); |
|
||||||
} |
|
||||||
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) |
|
||||||
{ |
|
||||||
puts("pg_standby (PostgreSQL) " PG_VERSION); |
|
||||||
exit(0); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
#ifndef WIN32 |
|
||||||
|
|
||||||
/*
|
|
||||||
* You can send SIGUSR1 to trigger failover. |
|
||||||
* |
|
||||||
* Postmaster uses SIGQUIT to request immediate shutdown. The default |
|
||||||
* action is to core dump, but we don't want that, so trap it and commit |
|
||||||
* suicide without core dump. |
|
||||||
* |
|
||||||
* We used to use SIGINT and SIGQUIT to trigger failover, but that turned |
|
||||||
* out to be a bad idea because postmaster uses SIGQUIT to request |
|
||||||
* immediate shutdown. We still trap SIGINT, but that may change in a |
|
||||||
* future release. |
|
||||||
* |
|
||||||
* There's no way to trigger failover via signal on Windows. |
|
||||||
*/ |
|
||||||
(void) pqsignal(SIGUSR1, sighandler); |
|
||||||
(void) pqsignal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */ |
|
||||||
(void) pqsignal(SIGQUIT, sigquit_handler); |
|
||||||
#endif |
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) |
|
||||||
{ |
|
||||||
switch (c) |
|
||||||
{ |
|
||||||
case 'c': /* Use copy */ |
|
||||||
restoreCommandType = RESTORE_COMMAND_COPY; |
|
||||||
break; |
|
||||||
case 'd': /* Debug mode */ |
|
||||||
debug = true; |
|
||||||
break; |
|
||||||
case 'k': /* keepfiles */ |
|
||||||
keepfiles = atoi(optarg); |
|
||||||
if (keepfiles < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
break; |
|
||||||
case 'l': /* Use link */ |
|
||||||
|
|
||||||
/*
|
|
||||||
* Link feature disabled, possibly permanently. Linking causes |
|
||||||
* a problem after recovery ends that is not currently |
|
||||||
* resolved by PostgreSQL. 25 Jun 2009 |
|
||||||
*/ |
|
||||||
#ifdef NOT_USED |
|
||||||
restoreCommandType = RESTORE_COMMAND_LINK; |
|
||||||
#endif |
|
||||||
break; |
|
||||||
case 'r': /* Retries */ |
|
||||||
maxretries = atoi(optarg); |
|
||||||
if (maxretries < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
break; |
|
||||||
case 's': /* Sleep time */ |
|
||||||
sleeptime = atoi(optarg); |
|
||||||
if (sleeptime <= 0 || sleeptime > 60) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
break; |
|
||||||
case 't': /* Trigger file */ |
|
||||||
triggerPath = pg_strdup(optarg); |
|
||||||
break; |
|
||||||
case 'w': /* Max wait time */ |
|
||||||
maxwaittime = atoi(optarg); |
|
||||||
if (maxwaittime < 0) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
break; |
|
||||||
default: |
|
||||||
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); |
|
||||||
exit(2); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Parameter checking - after checking to see if trigger file present |
|
||||||
*/ |
|
||||||
if (argc == 1) |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: not enough command-line arguments\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* We will go to the archiveLocation to get nextWALFileName. |
|
||||||
* nextWALFileName may not exist yet, which would not be an error, so we |
|
||||||
* separate the archiveLocation and nextWALFileName so we can check |
|
||||||
* separately whether archiveLocation exists, if not that is an error |
|
||||||
*/ |
|
||||||
if (optind < argc) |
|
||||||
{ |
|
||||||
archiveLocation = argv[optind]; |
|
||||||
optind++; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: must specify archive location\n", progname); |
|
||||||
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
|
|
||||||
if (optind < argc) |
|
||||||
{ |
|
||||||
nextWALFileName = argv[optind]; |
|
||||||
optind++; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname); |
|
||||||
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
|
|
||||||
if (optind < argc) |
|
||||||
{ |
|
||||||
xlogFilePath = argv[optind]; |
|
||||||
optind++; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname); |
|
||||||
fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); |
|
||||||
exit(2); |
|
||||||
} |
|
||||||
|
|
||||||
if (optind < argc) |
|
||||||
{ |
|
||||||
restartWALFileName = argv[optind]; |
|
||||||
optind++; |
|
||||||
} |
|
||||||
|
|
||||||
CustomizableInitialize(); |
|
||||||
|
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>"); |
|
||||||
fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName); |
|
||||||
fprintf(stderr, "WAL file path: %s\n", WALFilePath); |
|
||||||
fprintf(stderr, "Restoring to: %s\n", xlogFilePath); |
|
||||||
fprintf(stderr, "Sleep interval: %d second%s\n", |
|
||||||
sleeptime, (sleeptime > 1 ? "s" : " ")); |
|
||||||
fprintf(stderr, "Max wait interval: %d %s\n", |
|
||||||
maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); |
|
||||||
fprintf(stderr, "Command for restore: %s\n", restoreCommand); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for initial history file: always the first file to be requested |
|
||||||
* It's OK if the file isn't there - all other files need to wait |
|
||||||
*/ |
|
||||||
if (IsTLHistoryFileName(nextWALFileName)) |
|
||||||
{ |
|
||||||
nextWALFileType = XLOG_HISTORY; |
|
||||||
if (RestoreWALFileForRecovery()) |
|
||||||
exit(0); |
|
||||||
else |
|
||||||
{ |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "history file not found\n"); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
exit(1); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Main wait loop |
|
||||||
*/ |
|
||||||
for (;;) |
|
||||||
{ |
|
||||||
/* Check for trigger file or signal first */ |
|
||||||
CheckForExternalTrigger(); |
|
||||||
#ifndef WIN32 |
|
||||||
if (signaled) |
|
||||||
{ |
|
||||||
Failover = FastFailover; |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "signaled to exit: fast failover\n"); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
} |
|
||||||
#endif |
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for fast failover immediately, before checking if the |
|
||||||
* requested WAL file is available |
|
||||||
*/ |
|
||||||
if (Failover == FastFailover) |
|
||||||
exit(1); |
|
||||||
|
|
||||||
if (CustomizableNextWALFileReady()) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Once we have restored this file successfully we can remove some |
|
||||||
* prior WAL files. If this restore fails we mustn't remove any |
|
||||||
* file because some of them will be requested again immediately |
|
||||||
* after the failed restore, or when we restart recovery. |
|
||||||
*/ |
|
||||||
if (RestoreWALFileForRecovery()) |
|
||||||
{ |
|
||||||
if (need_cleanup) |
|
||||||
CustomizableCleanupPriorWALFiles(); |
|
||||||
|
|
||||||
exit(0); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
/* Something went wrong in copying the file */ |
|
||||||
exit(1); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/* Check for smart failover if the next WAL file was not available */ |
|
||||||
if (Failover == SmartFailover) |
|
||||||
exit(1); |
|
||||||
|
|
||||||
if (sleeptime <= 60) |
|
||||||
pg_usleep(sleeptime * 1000000L); |
|
||||||
|
|
||||||
waittime += sleeptime; |
|
||||||
if (waittime >= maxwaittime && maxwaittime > 0) |
|
||||||
{ |
|
||||||
Failover = FastFailover; |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "Timed out after %d seconds: fast failover\n", |
|
||||||
waittime); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
} |
|
||||||
if (debug) |
|
||||||
{ |
|
||||||
fprintf(stderr, "WAL file not present yet."); |
|
||||||
if (triggerPath) |
|
||||||
fprintf(stderr, " Checking for trigger file..."); |
|
||||||
fprintf(stderr, "\n"); |
|
||||||
fflush(stderr); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,394 +0,0 @@ |
|||||||
<!-- doc/src/sgml/pgstandby.sgml --> |
|
||||||
|
|
||||||
<refentry id="pgstandby"> |
|
||||||
<indexterm zone="pgstandby"> |
|
||||||
<primary>pg_standby</primary> |
|
||||||
</indexterm> |
|
||||||
|
|
||||||
<refmeta> |
|
||||||
<refentrytitle><application>pg_standby</application></refentrytitle> |
|
||||||
<manvolnum>1</manvolnum> |
|
||||||
<refmiscinfo>Application</refmiscinfo> |
|
||||||
</refmeta> |
|
||||||
|
|
||||||
<refnamediv> |
|
||||||
<refname>pg_standby</refname> |
|
||||||
<refpurpose>supports the creation of a <productname>PostgreSQL</productname> warm standby server</refpurpose> |
|
||||||
</refnamediv> |
|
||||||
|
|
||||||
<refsynopsisdiv> |
|
||||||
<cmdsynopsis> |
|
||||||
<command>pg_standby</command> |
|
||||||
<arg rep="repeat"><replaceable>option</replaceable></arg> |
|
||||||
<arg choice="plain"><replaceable>archivelocation</replaceable></arg> |
|
||||||
<arg choice="plain"><replaceable>nextwalfile</replaceable></arg> |
|
||||||
<arg choice="plain"><replaceable>walfilepath</replaceable></arg> |
|
||||||
<arg choice="opt"><replaceable>restartwalfile</replaceable></arg> |
|
||||||
</cmdsynopsis> |
|
||||||
</refsynopsisdiv> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>Description</title> |
|
||||||
|
|
||||||
<para> |
|
||||||
<application>pg_standby</application> supports creation of a <quote>warm standby</quote> |
|
||||||
database server. It is designed to be a production-ready program, as well |
|
||||||
as a customizable template should you require specific modifications. |
|
||||||
</para> |
|
||||||
|
|
||||||
<para> |
|
||||||
<application>pg_standby</application> is designed to be a waiting |
|
||||||
<varname>restore_command</varname>, which is needed to turn a standard |
|
||||||
archive recovery into a warm standby operation. Other |
|
||||||
configuration is required as well, all of which is described in the main |
|
||||||
server manual (see <xref linkend="warm-standby"/>). |
|
||||||
</para> |
|
||||||
|
|
||||||
<para> |
|
||||||
To configure a standby |
|
||||||
server to use <application>pg_standby</application>, put this into its |
|
||||||
<filename>postgresql.conf</filename> configuration file: |
|
||||||
<programlisting> |
|
||||||
restore_command = 'pg_standby <replaceable>archiveDir</replaceable> %f %p %r' |
|
||||||
</programlisting> |
|
||||||
where <replaceable>archiveDir</replaceable> is the directory from which WAL segment |
|
||||||
files should be restored. |
|
||||||
</para> |
|
||||||
<para> |
|
||||||
If <replaceable>restartwalfile</replaceable> is specified, normally by using the |
|
||||||
<literal>%r</literal> macro, then all WAL files logically preceding this |
|
||||||
file will be removed from <replaceable>archivelocation</replaceable>. This minimizes |
|
||||||
the number of files that need to be retained, while preserving |
|
||||||
crash-restart capability. Use of this parameter is appropriate if the |
|
||||||
<replaceable>archivelocation</replaceable> is a transient staging area for this |
|
||||||
particular standby server, but <emphasis>not</emphasis> when the |
|
||||||
<replaceable>archivelocation</replaceable> is intended as a long-term WAL archive area. |
|
||||||
</para> |
|
||||||
<para> |
|
||||||
<application>pg_standby</application> assumes that |
|
||||||
<replaceable>archivelocation</replaceable> is a directory readable by the |
|
||||||
server-owning user. If <replaceable>restartwalfile</replaceable> (or <literal>-k</literal>) |
|
||||||
is specified, |
|
||||||
the <replaceable>archivelocation</replaceable> directory must be writable too. |
|
||||||
</para> |
|
||||||
<para> |
|
||||||
There are two ways to fail over to a <quote>warm standby</quote> database server |
|
||||||
when the primary server fails: |
|
||||||
|
|
||||||
<variablelist> |
|
||||||
<varlistentry> |
|
||||||
<term>Smart Failover</term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
In smart failover, the server is brought up after applying all WAL |
|
||||||
files available in the archive. This results in zero data loss, even if |
|
||||||
the standby server has fallen behind, but if there is a lot of |
|
||||||
unapplied WAL it can be a long time before the standby server becomes |
|
||||||
ready. To trigger a smart failover, create a trigger file containing |
|
||||||
the word <literal>smart</literal>, or just create it and leave it empty. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
<varlistentry> |
|
||||||
<term>Fast Failover</term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
In fast failover, the server is brought up immediately. Any WAL files |
|
||||||
in the archive that have not yet been applied will be ignored, and |
|
||||||
all transactions in those files are lost. To trigger a fast failover, |
|
||||||
create a trigger file and write the word <literal>fast</literal> into it. |
|
||||||
<application>pg_standby</application> can also be configured to execute a fast |
|
||||||
failover automatically if no new WAL file appears within a defined |
|
||||||
interval. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
</variablelist> |
|
||||||
</para> |
|
||||||
|
|
||||||
</refsect1> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>Options</title> |
|
||||||
|
|
||||||
<para> |
|
||||||
<application>pg_standby</application> accepts the following command-line arguments: |
|
||||||
|
|
||||||
<variablelist> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-c</option></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Use <literal>cp</literal> or <literal>copy</literal> command to restore WAL files |
|
||||||
from archive. This is the only supported behavior so this option is useless. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-d</option></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Print lots of debug logging output on <filename>stderr</filename>. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-k</option></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Remove files from <replaceable>archivelocation</replaceable> so that |
|
||||||
no more than this many WAL files before the current one are kept in the |
|
||||||
archive. Zero (the default) means not to remove any files from |
|
||||||
<replaceable>archivelocation</replaceable>. |
|
||||||
This parameter will be silently ignored if |
|
||||||
<replaceable>restartwalfile</replaceable> is specified, since that |
|
||||||
specification method is more accurate in determining the correct |
|
||||||
archive cut-off point. |
|
||||||
Use of this parameter is <emphasis>deprecated</emphasis> as of |
|
||||||
<productname>PostgreSQL</productname> 8.3; it is safer and more efficient to |
|
||||||
specify a <replaceable>restartwalfile</replaceable> parameter. A too |
|
||||||
small setting could result in removal of files that are still needed |
|
||||||
for a restart of the standby server, while a too large setting wastes |
|
||||||
archive space. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-r</option> <replaceable>maxretries</replaceable></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Set the maximum number of times to retry the copy command if |
|
||||||
it fails (default 3). After each failure, we wait for |
|
||||||
<replaceable>sleeptime</replaceable> * <replaceable>num_retries</replaceable> |
|
||||||
so that the wait time increases progressively. So by default, |
|
||||||
we will wait 5 secs, 10 secs, then 15 secs before reporting |
|
||||||
the failure back to the standby server. This will be |
|
||||||
interpreted as end of recovery and the standby will come |
|
||||||
up fully as a result. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-s</option> <replaceable>sleeptime</replaceable></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Set the number of seconds (up to 60, default 5) to sleep between |
|
||||||
tests to see if the WAL file to be restored is available in |
|
||||||
the archive yet. The default setting is not necessarily |
|
||||||
recommended; consult <xref linkend="warm-standby"/> for discussion. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-t</option> <replaceable>triggerfile</replaceable></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Specify a trigger file whose presence should cause failover. |
|
||||||
It is recommended that you use a structured file name to |
|
||||||
avoid confusion as to which server is being triggered |
|
||||||
when multiple servers exist on the same system; for example |
|
||||||
<filename>/tmp/pgsql.trigger.5432</filename>. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-V</option></term> |
|
||||||
<term><option>--version</option></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Print the <application>pg_standby</application> version and exit. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-w</option> <replaceable>maxwaittime</replaceable></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Set the maximum number of seconds to wait for the next WAL file, |
|
||||||
after which a fast failover will be performed. |
|
||||||
A setting of zero (the default) means wait forever. |
|
||||||
The default setting is not necessarily recommended; |
|
||||||
consult <xref linkend="warm-standby"/> for discussion. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
|
|
||||||
<varlistentry> |
|
||||||
<term><option>-?</option></term> |
|
||||||
<term><option>--help</option></term> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
Show help about <application>pg_standby</application> command line |
|
||||||
arguments, and exit. |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</varlistentry> |
|
||||||
</variablelist> |
|
||||||
</para> |
|
||||||
|
|
||||||
</refsect1> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>Notes</title> |
|
||||||
|
|
||||||
<para> |
|
||||||
<application>pg_standby</application> is designed to work with |
|
||||||
<productname>PostgreSQL</productname> 8.2 and later. |
|
||||||
</para> |
|
||||||
<para> |
|
||||||
<productname>PostgreSQL</productname> 8.3 provides the <literal>%r</literal> macro, |
|
||||||
which is designed to let <application>pg_standby</application> know the |
|
||||||
last file it needs to keep. With <productname>PostgreSQL</productname> 8.2, the |
|
||||||
<literal>-k</literal> option must be used if archive cleanup is |
|
||||||
required. This option remains available in 8.3, but its use is deprecated. |
|
||||||
</para> |
|
||||||
<para> |
|
||||||
<productname>PostgreSQL</productname> 8.4 provides the |
|
||||||
<varname>recovery_end_command</varname> option. Without this option |
|
||||||
a leftover trigger file can be hazardous. |
|
||||||
</para> |
|
||||||
|
|
||||||
<para> |
|
||||||
<application>pg_standby</application> is written in C and has an |
|
||||||
easy-to-modify source code, with specifically designated sections to modify |
|
||||||
for your own needs |
|
||||||
</para> |
|
||||||
</refsect1> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>Examples</title> |
|
||||||
|
|
||||||
<para>On Linux or Unix systems, you might use: |
|
||||||
|
|
||||||
<programlisting> |
|
||||||
archive_command = 'cp %p .../archive/%f' |
|
||||||
|
|
||||||
restore_command = 'pg_standby -d -s 2 -t /tmp/pgsql.trigger.5442 .../archive %f %p %r 2>>standby.log' |
|
||||||
|
|
||||||
recovery_end_command = 'rm -f /tmp/pgsql.trigger.5442' |
|
||||||
</programlisting> |
|
||||||
where the archive directory is physically located on the standby server, |
|
||||||
so that the <varname>archive_command</varname> is accessing it across NFS, |
|
||||||
but the files are local to the standby (enabling use of <literal>ln</literal>). |
|
||||||
This will: |
|
||||||
<itemizedlist> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
produce debugging output in <filename>standby.log</filename> |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
sleep for 2 seconds between checks for next WAL file availability |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
stop waiting only when a trigger file called |
|
||||||
<filename>/tmp/pgsql.trigger.5442</filename> appears, |
|
||||||
and perform failover according to its content |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
remove the trigger file when recovery ends |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
remove no-longer-needed files from the archive directory |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</itemizedlist> |
|
||||||
</para> |
|
||||||
|
|
||||||
<para>On Windows, you might use: |
|
||||||
|
|
||||||
<programlisting> |
|
||||||
archive_command = 'copy %p ...\\archive\\%f' |
|
||||||
|
|
||||||
restore_command = 'pg_standby -d -s 5 -t C:\pgsql.trigger.5442 ...\archive %f %p %r 2>>standby.log' |
|
||||||
|
|
||||||
recovery_end_command = 'del C:\pgsql.trigger.5442' |
|
||||||
</programlisting> |
|
||||||
Note that backslashes need to be doubled in the |
|
||||||
<varname>archive_command</varname>, but <emphasis>not</emphasis> in the |
|
||||||
<varname>restore_command</varname> or <varname>recovery_end_command</varname>. |
|
||||||
This will: |
|
||||||
<itemizedlist> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
use the <literal>copy</literal> command to restore WAL files from archive |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
produce debugging output in <filename>standby.log</filename> |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
sleep for 5 seconds between checks for next WAL file availability |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
stop waiting only when a trigger file called |
|
||||||
<filename>C:\pgsql.trigger.5442</filename> appears, |
|
||||||
and perform failover according to its content |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
remove the trigger file when recovery ends |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
<listitem> |
|
||||||
<para> |
|
||||||
remove no-longer-needed files from the archive directory |
|
||||||
</para> |
|
||||||
</listitem> |
|
||||||
</itemizedlist> |
|
||||||
</para> |
|
||||||
|
|
||||||
<para> |
|
||||||
The <literal>copy</literal> command on Windows sets the final file size |
|
||||||
before the file is completely copied, which would ordinarily confuse |
|
||||||
<application>pg_standby</application>. Therefore |
|
||||||
<application>pg_standby</application> waits <replaceable>sleeptime</replaceable> |
|
||||||
seconds once it sees the proper file size. GNUWin32's <literal>cp</literal> |
|
||||||
sets the file size only after the file copy is complete. |
|
||||||
</para> |
|
||||||
|
|
||||||
<para> |
|
||||||
Since the Windows example uses <literal>copy</literal> at both ends, either |
|
||||||
or both servers might be accessing the archive directory across the |
|
||||||
network. |
|
||||||
</para> |
|
||||||
|
|
||||||
</refsect1> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>Author</title> |
|
||||||
|
|
||||||
<para> |
|
||||||
Simon Riggs <email>simon@2ndquadrant.com</email> |
|
||||||
</para> |
|
||||||
</refsect1> |
|
||||||
|
|
||||||
<refsect1> |
|
||||||
<title>See Also</title> |
|
||||||
|
|
||||||
<simplelist type="inline"> |
|
||||||
<member><xref linkend="pgarchivecleanup"/></member> |
|
||||||
</simplelist> |
|
||||||
</refsect1> |
|
||||||
</refentry> |
|
Loading…
Reference in new issue