mirror of https://github.com/postgres/postgres
This commit revertspull/182/head3c5db1d6b0
, and subsequent improvements and fixes including8036d73ae3
,867d396ccd
,3ac3ec580c
,0868d7ae70
,85b98b8d5a
,2520226c95
,014f9f34d2
,e658038772
,e1555645d7
,5035172e4a
,6cfebfe88b
,73da6b8d1b
, ande546989a26
. The reason for reverting is a set of remaining issues. Most notably, the stored procedure appears to need more effort than the utility statement to turn the backend into a "snapshot-less" state. This makes an approach to use stored procedures questionable. Catversion is bumped. Discussion: https://postgr.es/m/Zyhj2anOPRKtb0xW%40paquier.xyz
parent
3293b718a0
commit
3a7ae6b3d9
@ -1,337 +0,0 @@ |
|||||||
/*-------------------------------------------------------------------------
|
|
||||||
* |
|
||||||
* xlogwait.c |
|
||||||
* Implements waiting for the given replay LSN, which is used in |
|
||||||
* CALL pg_wal_replay_wait(target_lsn pg_lsn, |
|
||||||
* timeout float8, no_error bool). |
|
||||||
* |
|
||||||
* Copyright (c) 2024, PostgreSQL Global Development Group |
|
||||||
* |
|
||||||
* IDENTIFICATION |
|
||||||
* src/backend/access/transam/xlogwait.c |
|
||||||
* |
|
||||||
*------------------------------------------------------------------------- |
|
||||||
*/ |
|
||||||
|
|
||||||
#include "postgres.h" |
|
||||||
|
|
||||||
#include <float.h> |
|
||||||
#include <math.h> |
|
||||||
|
|
||||||
#include "pgstat.h" |
|
||||||
#include "access/xlog.h" |
|
||||||
#include "access/xlogrecovery.h" |
|
||||||
#include "access/xlogwait.h" |
|
||||||
#include "miscadmin.h" |
|
||||||
#include "storage/latch.h" |
|
||||||
#include "storage/proc.h" |
|
||||||
#include "storage/shmem.h" |
|
||||||
#include "utils/fmgrprotos.h" |
|
||||||
#include "utils/pg_lsn.h" |
|
||||||
#include "utils/snapmgr.h" |
|
||||||
|
|
||||||
static int waitlsn_cmp(const pairingheap_node *a, const pairingheap_node *b, |
|
||||||
void *arg); |
|
||||||
|
|
||||||
struct WaitLSNState *waitLSNState = NULL; |
|
||||||
|
|
||||||
/* Report the amount of shared memory space needed for WaitLSNState. */ |
|
||||||
Size |
|
||||||
WaitLSNShmemSize(void) |
|
||||||
{ |
|
||||||
Size size; |
|
||||||
|
|
||||||
size = offsetof(WaitLSNState, procInfos); |
|
||||||
size = add_size(size, mul_size(MaxBackends, sizeof(WaitLSNProcInfo))); |
|
||||||
return size; |
|
||||||
} |
|
||||||
|
|
||||||
/* Initialize the WaitLSNState in the shared memory. */ |
|
||||||
void |
|
||||||
WaitLSNShmemInit(void) |
|
||||||
{ |
|
||||||
bool found; |
|
||||||
|
|
||||||
waitLSNState = (WaitLSNState *) ShmemInitStruct("WaitLSNState", |
|
||||||
WaitLSNShmemSize(), |
|
||||||
&found); |
|
||||||
if (!found) |
|
||||||
{ |
|
||||||
pg_atomic_init_u64(&waitLSNState->minWaitedLSN, PG_UINT64_MAX); |
|
||||||
pairingheap_initialize(&waitLSNState->waitersHeap, waitlsn_cmp, NULL); |
|
||||||
memset(&waitLSNState->procInfos, 0, MaxBackends * sizeof(WaitLSNProcInfo)); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Comparison function for waitLSN->waitersHeap heap. Waiting processes are |
|
||||||
* ordered by lsn, so that the waiter with smallest lsn is at the top. |
|
||||||
*/ |
|
||||||
static int |
|
||||||
waitlsn_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg) |
|
||||||
{ |
|
||||||
const WaitLSNProcInfo *aproc = pairingheap_const_container(WaitLSNProcInfo, phNode, a); |
|
||||||
const WaitLSNProcInfo *bproc = pairingheap_const_container(WaitLSNProcInfo, phNode, b); |
|
||||||
|
|
||||||
if (aproc->waitLSN < bproc->waitLSN) |
|
||||||
return 1; |
|
||||||
else if (aproc->waitLSN > bproc->waitLSN) |
|
||||||
return -1; |
|
||||||
else |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Update waitLSN->minWaitedLSN according to the current state of |
|
||||||
* waitLSN->waitersHeap. |
|
||||||
*/ |
|
||||||
static void |
|
||||||
updateMinWaitedLSN(void) |
|
||||||
{ |
|
||||||
XLogRecPtr minWaitedLSN = PG_UINT64_MAX; |
|
||||||
|
|
||||||
if (!pairingheap_is_empty(&waitLSNState->waitersHeap)) |
|
||||||
{ |
|
||||||
pairingheap_node *node = pairingheap_first(&waitLSNState->waitersHeap); |
|
||||||
|
|
||||||
minWaitedLSN = pairingheap_container(WaitLSNProcInfo, phNode, node)->waitLSN; |
|
||||||
} |
|
||||||
|
|
||||||
pg_atomic_write_u64(&waitLSNState->minWaitedLSN, minWaitedLSN); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Put the current process into the heap of LSN waiters. |
|
||||||
*/ |
|
||||||
static void |
|
||||||
addLSNWaiter(XLogRecPtr lsn) |
|
||||||
{ |
|
||||||
WaitLSNProcInfo *procInfo = &waitLSNState->procInfos[MyProcNumber]; |
|
||||||
|
|
||||||
LWLockAcquire(WaitLSNLock, LW_EXCLUSIVE); |
|
||||||
|
|
||||||
Assert(!procInfo->inHeap); |
|
||||||
|
|
||||||
procInfo->procno = MyProcNumber; |
|
||||||
procInfo->waitLSN = lsn; |
|
||||||
|
|
||||||
pairingheap_add(&waitLSNState->waitersHeap, &procInfo->phNode); |
|
||||||
procInfo->inHeap = true; |
|
||||||
updateMinWaitedLSN(); |
|
||||||
|
|
||||||
LWLockRelease(WaitLSNLock); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Remove the current process from the heap of LSN waiters if it's there. |
|
||||||
*/ |
|
||||||
static void |
|
||||||
deleteLSNWaiter(void) |
|
||||||
{ |
|
||||||
WaitLSNProcInfo *procInfo = &waitLSNState->procInfos[MyProcNumber]; |
|
||||||
|
|
||||||
LWLockAcquire(WaitLSNLock, LW_EXCLUSIVE); |
|
||||||
|
|
||||||
if (!procInfo->inHeap) |
|
||||||
{ |
|
||||||
LWLockRelease(WaitLSNLock); |
|
||||||
return; |
|
||||||
} |
|
||||||
|
|
||||||
pairingheap_remove(&waitLSNState->waitersHeap, &procInfo->phNode); |
|
||||||
procInfo->inHeap = false; |
|
||||||
updateMinWaitedLSN(); |
|
||||||
|
|
||||||
LWLockRelease(WaitLSNLock); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Remove waiters whose LSN has been replayed from the heap and set their |
|
||||||
* latches. If InvalidXLogRecPtr is given, remove all waiters from the heap |
|
||||||
* and set latches for all waiters. |
|
||||||
*/ |
|
||||||
void |
|
||||||
WaitLSNWakeup(XLogRecPtr currentLSN) |
|
||||||
{ |
|
||||||
int i; |
|
||||||
ProcNumber *wakeUpProcs; |
|
||||||
int numWakeUpProcs = 0; |
|
||||||
|
|
||||||
wakeUpProcs = palloc(sizeof(ProcNumber) * MaxBackends); |
|
||||||
|
|
||||||
LWLockAcquire(WaitLSNLock, LW_EXCLUSIVE); |
|
||||||
|
|
||||||
/*
|
|
||||||
* Iterate the pairing heap of waiting processes till we find LSN not yet |
|
||||||
* replayed. Record the process numbers to wake up, but to avoid holding |
|
||||||
* the lock for too long, send the wakeups only after releasing the lock. |
|
||||||
*/ |
|
||||||
while (!pairingheap_is_empty(&waitLSNState->waitersHeap)) |
|
||||||
{ |
|
||||||
pairingheap_node *node = pairingheap_first(&waitLSNState->waitersHeap); |
|
||||||
WaitLSNProcInfo *procInfo = pairingheap_container(WaitLSNProcInfo, phNode, node); |
|
||||||
|
|
||||||
if (!XLogRecPtrIsInvalid(currentLSN) && |
|
||||||
procInfo->waitLSN > currentLSN) |
|
||||||
break; |
|
||||||
|
|
||||||
wakeUpProcs[numWakeUpProcs++] = procInfo->procno; |
|
||||||
(void) pairingheap_remove_first(&waitLSNState->waitersHeap); |
|
||||||
procInfo->inHeap = false; |
|
||||||
} |
|
||||||
|
|
||||||
updateMinWaitedLSN(); |
|
||||||
|
|
||||||
LWLockRelease(WaitLSNLock); |
|
||||||
|
|
||||||
/*
|
|
||||||
* Set latches for processes, whose waited LSNs are already replayed. As |
|
||||||
* the time consuming operations, we do it this outside of WaitLSNLock. |
|
||||||
* This is actually fine because procLatch isn't ever freed, so we just |
|
||||||
* can potentially set the wrong process' (or no process') latch. |
|
||||||
*/ |
|
||||||
for (i = 0; i < numWakeUpProcs; i++) |
|
||||||
{ |
|
||||||
SetLatch(&GetPGProcByNumber(wakeUpProcs[i])->procLatch); |
|
||||||
} |
|
||||||
pfree(wakeUpProcs); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Delete our item from shmem array if any. |
|
||||||
*/ |
|
||||||
void |
|
||||||
WaitLSNCleanup(void) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* We do a fast-path check of the 'inHeap' flag without the lock. This |
|
||||||
* flag is set to true only by the process itself. So, it's only possible |
|
||||||
* to get a false positive. But that will be eliminated by a recheck |
|
||||||
* inside deleteLSNWaiter(). |
|
||||||
*/ |
|
||||||
if (waitLSNState->procInfos[MyProcNumber].inHeap) |
|
||||||
deleteLSNWaiter(); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait using MyLatch till the given LSN is replayed, the postmaster dies or |
|
||||||
* timeout happens. |
|
||||||
*/ |
|
||||||
WaitLSNResult |
|
||||||
WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout) |
|
||||||
{ |
|
||||||
XLogRecPtr currentLSN; |
|
||||||
TimestampTz endtime = 0; |
|
||||||
int wake_events = WL_LATCH_SET | WL_POSTMASTER_DEATH; |
|
||||||
|
|
||||||
/* Shouldn't be called when shmem isn't initialized */ |
|
||||||
Assert(waitLSNState); |
|
||||||
|
|
||||||
/* Should have a valid proc number */ |
|
||||||
Assert(MyProcNumber >= 0 && MyProcNumber < MaxBackends); |
|
||||||
|
|
||||||
if (!RecoveryInProgress()) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Recovery is not in progress. Given that we detected this in the |
|
||||||
* very first check, this procedure was mistakenly called on primary. |
|
||||||
* However, it's possible that standby was promoted concurrently to |
|
||||||
* the procedure call, while target LSN is replayed. So, we still |
|
||||||
* check the last replay LSN before reporting an error. |
|
||||||
*/ |
|
||||||
if (targetLSN <= GetXLogReplayRecPtr(NULL)) |
|
||||||
return WAIT_LSN_RESULT_SUCCESS; |
|
||||||
return WAIT_LSN_RESULT_NOT_IN_RECOVERY; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
/* If target LSN is already replayed, exit immediately */ |
|
||||||
if (targetLSN <= GetXLogReplayRecPtr(NULL)) |
|
||||||
return WAIT_LSN_RESULT_SUCCESS; |
|
||||||
} |
|
||||||
|
|
||||||
if (timeout > 0) |
|
||||||
{ |
|
||||||
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), timeout); |
|
||||||
wake_events |= WL_TIMEOUT; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Add our process to the pairing heap of waiters. It might happen that |
|
||||||
* target LSN gets replayed before we do. Another check at the beginning |
|
||||||
* of the loop below prevents the race condition. |
|
||||||
*/ |
|
||||||
addLSNWaiter(targetLSN); |
|
||||||
|
|
||||||
for (;;) |
|
||||||
{ |
|
||||||
int rc; |
|
||||||
long delay_ms = 0; |
|
||||||
|
|
||||||
/* Recheck that recovery is still in-progress */ |
|
||||||
if (!RecoveryInProgress()) |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* Recovery was ended, but recheck if target LSN was already |
|
||||||
* replayed. See the comment regarding deleteLSNWaiter() below. |
|
||||||
*/ |
|
||||||
deleteLSNWaiter(); |
|
||||||
currentLSN = GetXLogReplayRecPtr(NULL); |
|
||||||
if (targetLSN <= currentLSN) |
|
||||||
return WAIT_LSN_RESULT_SUCCESS; |
|
||||||
return WAIT_LSN_RESULT_NOT_IN_RECOVERY; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
/* Check if the waited LSN has been replayed */ |
|
||||||
currentLSN = GetXLogReplayRecPtr(NULL); |
|
||||||
if (targetLSN <= currentLSN) |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* If the timeout value is specified, calculate the number of |
|
||||||
* milliseconds before the timeout. Exit if the timeout is already |
|
||||||
* reached. |
|
||||||
*/ |
|
||||||
if (timeout > 0) |
|
||||||
{ |
|
||||||
delay_ms = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), endtime); |
|
||||||
if (delay_ms <= 0) |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS(); |
|
||||||
|
|
||||||
rc = WaitLatch(MyLatch, wake_events, delay_ms, |
|
||||||
WAIT_EVENT_WAIT_FOR_WAL_REPLAY); |
|
||||||
|
|
||||||
/*
|
|
||||||
* Emergency bailout if postmaster has died. This is to avoid the |
|
||||||
* necessity for manual cleanup of all postmaster children. |
|
||||||
*/ |
|
||||||
if (rc & WL_POSTMASTER_DEATH) |
|
||||||
ereport(FATAL, |
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN), |
|
||||||
errmsg("terminating connection due to unexpected postmaster exit"), |
|
||||||
errcontext("while waiting for LSN replay"))); |
|
||||||
|
|
||||||
if (rc & WL_LATCH_SET) |
|
||||||
ResetLatch(MyLatch); |
|
||||||
} |
|
||||||
|
|
||||||
/*
|
|
||||||
* Delete our process from the shared memory pairing heap. We might |
|
||||||
* already be deleted by the startup process. The 'inHeap' flag prevents |
|
||||||
* us from the double deletion. |
|
||||||
*/ |
|
||||||
deleteLSNWaiter(); |
|
||||||
|
|
||||||
/*
|
|
||||||
* If we didn't reach the target LSN, we must be exited by timeout. |
|
||||||
*/ |
|
||||||
if (targetLSN > currentLSN) |
|
||||||
return WAIT_LSN_RESULT_TIMEOUT; |
|
||||||
|
|
||||||
return WAIT_LSN_RESULT_SUCCESS; |
|
||||||
} |
|
@ -1,89 +0,0 @@ |
|||||||
/*-------------------------------------------------------------------------
|
|
||||||
* |
|
||||||
* xlogwait.h |
|
||||||
* Declarations for LSN replay waiting routines. |
|
||||||
* |
|
||||||
* Copyright (c) 2024, PostgreSQL Global Development Group |
|
||||||
* |
|
||||||
* src/include/access/xlogwait.h |
|
||||||
* |
|
||||||
*------------------------------------------------------------------------- |
|
||||||
*/ |
|
||||||
#ifndef XLOG_WAIT_H |
|
||||||
#define XLOG_WAIT_H |
|
||||||
|
|
||||||
#include "lib/pairingheap.h" |
|
||||||
#include "postgres.h" |
|
||||||
#include "port/atomics.h" |
|
||||||
#include "storage/procnumber.h" |
|
||||||
#include "storage/spin.h" |
|
||||||
#include "tcop/dest.h" |
|
||||||
|
|
||||||
/*
|
|
||||||
* WaitLSNProcInfo - the shared memory structure representing information |
|
||||||
* about the single process, which may wait for LSN replay. An item of |
|
||||||
* waitLSN->procInfos array. |
|
||||||
*/ |
|
||||||
typedef struct WaitLSNProcInfo |
|
||||||
{ |
|
||||||
/* LSN, which this process is waiting for */ |
|
||||||
XLogRecPtr waitLSN; |
|
||||||
|
|
||||||
/* Process to wake up once the waitLSN is replayed */ |
|
||||||
ProcNumber procno; |
|
||||||
|
|
||||||
/* A pairing heap node for participation in waitLSNState->waitersHeap */ |
|
||||||
pairingheap_node phNode; |
|
||||||
|
|
||||||
/*
|
|
||||||
* A flag indicating that this item is present in |
|
||||||
* waitLSNState->waitersHeap |
|
||||||
*/ |
|
||||||
bool inHeap; |
|
||||||
} WaitLSNProcInfo; |
|
||||||
|
|
||||||
/*
|
|
||||||
* WaitLSNState - the shared memory state for the replay LSN waiting facility. |
|
||||||
*/ |
|
||||||
typedef struct WaitLSNState |
|
||||||
{ |
|
||||||
/*
|
|
||||||
* The minimum LSN value some process is waiting for. Used for the |
|
||||||
* fast-path checking if we need to wake up any waiters after replaying a |
|
||||||
* WAL record. Could be read lock-less. Update protected by WaitLSNLock. |
|
||||||
*/ |
|
||||||
pg_atomic_uint64 minWaitedLSN; |
|
||||||
|
|
||||||
/*
|
|
||||||
* A pairing heap of waiting processes order by LSN values (least LSN is |
|
||||||
* on top). Protected by WaitLSNLock. |
|
||||||
*/ |
|
||||||
pairingheap waitersHeap; |
|
||||||
|
|
||||||
/*
|
|
||||||
* An array with per-process information, indexed by the process number. |
|
||||||
* Protected by WaitLSNLock. |
|
||||||
*/ |
|
||||||
WaitLSNProcInfo procInfos[FLEXIBLE_ARRAY_MEMBER]; |
|
||||||
} WaitLSNState; |
|
||||||
|
|
||||||
/*
|
|
||||||
* Result statuses for WaitForLSNReplay(). |
|
||||||
*/ |
|
||||||
typedef enum |
|
||||||
{ |
|
||||||
WAIT_LSN_RESULT_SUCCESS, /* Target LSN is reached */ |
|
||||||
WAIT_LSN_RESULT_TIMEOUT, /* Timeout occurred */ |
|
||||||
WAIT_LSN_RESULT_NOT_IN_RECOVERY, /* Recovery ended before or during our
|
|
||||||
* wait */ |
|
||||||
} WaitLSNResult; |
|
||||||
|
|
||||||
extern PGDLLIMPORT WaitLSNState *waitLSNState; |
|
||||||
|
|
||||||
extern Size WaitLSNShmemSize(void); |
|
||||||
extern void WaitLSNShmemInit(void); |
|
||||||
extern void WaitLSNWakeup(XLogRecPtr currentLSN); |
|
||||||
extern void WaitLSNCleanup(void); |
|
||||||
extern WaitLSNResult WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout); |
|
||||||
|
|
||||||
#endif /* XLOG_WAIT_H */ |
|
@ -1,225 +0,0 @@ |
|||||||
# Checks waiting for the lsn replay on standby using |
|
||||||
# pg_wal_replay_wait() procedure. |
|
||||||
use strict; |
|
||||||
use warnings FATAL => 'all'; |
|
||||||
|
|
||||||
use PostgreSQL::Test::Cluster; |
|
||||||
use PostgreSQL::Test::Utils; |
|
||||||
use Test::More; |
|
||||||
|
|
||||||
# Initialize primary node |
|
||||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary'); |
|
||||||
$node_primary->init(allows_streaming => 1); |
|
||||||
$node_primary->start; |
|
||||||
|
|
||||||
# And some content and take a backup |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"CREATE TABLE wait_test AS SELECT generate_series(1,10) AS a"); |
|
||||||
my $backup_name = 'my_backup'; |
|
||||||
$node_primary->backup($backup_name); |
|
||||||
|
|
||||||
# Create a streaming standby with a 1 second delay from the backup |
|
||||||
my $node_standby = PostgreSQL::Test::Cluster->new('standby'); |
|
||||||
my $delay = 1; |
|
||||||
$node_standby->init_from_backup($node_primary, $backup_name, |
|
||||||
has_streaming => 1); |
|
||||||
$node_standby->append_conf( |
|
||||||
'postgresql.conf', qq[ |
|
||||||
recovery_min_apply_delay = '${delay}s' |
|
||||||
]); |
|
||||||
$node_standby->start; |
|
||||||
|
|
||||||
# 1. Make sure that pg_wal_replay_wait() works: add new content to |
|
||||||
# primary and memorize primary's insert LSN, then wait for that LSN to be |
|
||||||
# replayed on standby. |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"INSERT INTO wait_test VALUES (generate_series(11, 20))"); |
|
||||||
my $lsn1 = |
|
||||||
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()"); |
|
||||||
my $output = $node_standby->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CALL pg_wal_replay_wait('${lsn1}', 1000000); |
|
||||||
SELECT pg_lsn_cmp(pg_last_wal_replay_lsn(), '${lsn1}'::pg_lsn); |
|
||||||
]); |
|
||||||
|
|
||||||
# Make sure the current LSN on standby is at least as big as the LSN we |
|
||||||
# observed on primary's before. |
|
||||||
ok($output >= 0, |
|
||||||
"standby reached the same LSN as primary after pg_wal_replay_wait()"); |
|
||||||
|
|
||||||
# 2. Check that new data is visible after calling pg_wal_replay_wait() |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"INSERT INTO wait_test VALUES (generate_series(21, 30))"); |
|
||||||
my $lsn2 = |
|
||||||
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()"); |
|
||||||
$output = $node_standby->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CALL pg_wal_replay_wait('${lsn2}'); |
|
||||||
SELECT count(*) FROM wait_test; |
|
||||||
]); |
|
||||||
|
|
||||||
# Make sure the count(*) on standby reflects the recent changes on primary |
|
||||||
ok($output eq 30, "standby reached the same LSN as primary"); |
|
||||||
|
|
||||||
# 3. Check that waiting for unreachable LSN triggers the timeout. The |
|
||||||
# unreachable LSN must be well in advance. So WAL records issued by |
|
||||||
# the concurrent autovacuum could not affect that. |
|
||||||
my $lsn3 = |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"SELECT pg_current_wal_insert_lsn() + 10000000000"); |
|
||||||
my $stderr; |
|
||||||
$node_standby->safe_psql('postgres', |
|
||||||
"CALL pg_wal_replay_wait('${lsn2}', 10);"); |
|
||||||
$node_standby->psql( |
|
||||||
'postgres', |
|
||||||
"CALL pg_wal_replay_wait('${lsn3}', 1000);", |
|
||||||
stderr => \$stderr); |
|
||||||
ok( $stderr =~ /timed out while waiting for target LSN/, |
|
||||||
"get timeout on waiting for unreachable LSN"); |
|
||||||
|
|
||||||
$output = $node_standby->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CALL pg_wal_replay_wait('${lsn2}', 10, true); |
|
||||||
SELECT pg_wal_replay_wait_status();]); |
|
||||||
ok( $output eq "success", |
|
||||||
"pg_wal_replay_wait_status() returns correct status after successful waiting" |
|
||||||
); |
|
||||||
$output = $node_standby->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CALL pg_wal_replay_wait('${lsn3}', 10, true); |
|
||||||
SELECT pg_wal_replay_wait_status();]); |
|
||||||
ok($output eq "timeout", |
|
||||||
"pg_wal_replay_wait_status() returns correct status after timeout"); |
|
||||||
|
|
||||||
# 4. Check that pg_wal_replay_wait() triggers an error if called on primary, |
|
||||||
# within another function, or inside a transaction with an isolation level |
|
||||||
# higher than READ COMMITTED. |
|
||||||
|
|
||||||
$node_primary->psql( |
|
||||||
'postgres', |
|
||||||
"CALL pg_wal_replay_wait('${lsn3}');", |
|
||||||
stderr => \$stderr); |
|
||||||
ok( $stderr =~ /recovery is not in progress/, |
|
||||||
"get an error when running on the primary"); |
|
||||||
|
|
||||||
$node_standby->psql( |
|
||||||
'postgres', |
|
||||||
"BEGIN ISOLATION LEVEL REPEATABLE READ; CALL pg_wal_replay_wait('${lsn3}');", |
|
||||||
stderr => \$stderr); |
|
||||||
ok( $stderr =~ |
|
||||||
/pg_wal_replay_wait\(\) must be only called without an active or registered snapshot/, |
|
||||||
"get an error when running in a transaction with an isolation level higher than REPEATABLE READ" |
|
||||||
); |
|
||||||
|
|
||||||
$node_primary->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CREATE FUNCTION pg_wal_replay_wait_wrap(target_lsn pg_lsn) RETURNS void AS \$\$ |
|
||||||
BEGIN |
|
||||||
CALL pg_wal_replay_wait(target_lsn); |
|
||||||
END |
|
||||||
\$\$ |
|
||||||
LANGUAGE plpgsql; |
|
||||||
]); |
|
||||||
|
|
||||||
$node_primary->wait_for_catchup($node_standby); |
|
||||||
$node_standby->psql( |
|
||||||
'postgres', |
|
||||||
"SELECT pg_wal_replay_wait_wrap('${lsn3}');", |
|
||||||
stderr => \$stderr); |
|
||||||
ok( $stderr =~ |
|
||||||
/pg_wal_replay_wait\(\) must be only called without an active or registered snapshot/, |
|
||||||
"get an error when running within another function"); |
|
||||||
|
|
||||||
# 5. Also, check the scenario of multiple LSN waiters. We make 5 background |
|
||||||
# psql sessions each waiting for a corresponding insertion. When waiting is |
|
||||||
# finished, stored procedures logs if there are visible as many rows as |
|
||||||
# should be. |
|
||||||
$node_primary->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CREATE FUNCTION log_count(i int) RETURNS void AS \$\$ |
|
||||||
DECLARE |
|
||||||
count int; |
|
||||||
BEGIN |
|
||||||
SELECT count(*) FROM wait_test INTO count; |
|
||||||
IF count >= 31 + i THEN |
|
||||||
RAISE LOG 'count %', i; |
|
||||||
END IF; |
|
||||||
END |
|
||||||
\$\$ |
|
||||||
LANGUAGE plpgsql; |
|
||||||
]); |
|
||||||
$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_pause();"); |
|
||||||
my @psql_sessions; |
|
||||||
for (my $i = 0; $i < 5; $i++) |
|
||||||
{ |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"INSERT INTO wait_test VALUES (${i});"); |
|
||||||
my $lsn = |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"SELECT pg_current_wal_insert_lsn()"); |
|
||||||
$psql_sessions[$i] = $node_standby->background_psql('postgres'); |
|
||||||
$psql_sessions[$i]->query_until( |
|
||||||
qr/start/, qq[ |
|
||||||
\\echo start |
|
||||||
CALL pg_wal_replay_wait('${lsn}'); |
|
||||||
SELECT log_count(${i}); |
|
||||||
]); |
|
||||||
} |
|
||||||
my $log_offset = -s $node_standby->logfile; |
|
||||||
$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume();"); |
|
||||||
for (my $i = 0; $i < 5; $i++) |
|
||||||
{ |
|
||||||
$node_standby->wait_for_log("count ${i}", $log_offset); |
|
||||||
$psql_sessions[$i]->quit; |
|
||||||
} |
|
||||||
|
|
||||||
ok(1, 'multiple LSN waiters reported consistent data'); |
|
||||||
|
|
||||||
# 6. Check that the standby promotion terminates the wait on LSN. Start |
|
||||||
# waiting for an unreachable LSN then promote. Check the log for the relevant |
|
||||||
# error message. Also, check that waiting for already replayed LSN doesn't |
|
||||||
# cause an error even after promotion. |
|
||||||
my $lsn4 = |
|
||||||
$node_primary->safe_psql('postgres', |
|
||||||
"SELECT pg_current_wal_insert_lsn() + 10000000000"); |
|
||||||
my $lsn5 = |
|
||||||
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()"); |
|
||||||
my $psql_session = $node_standby->background_psql('postgres'); |
|
||||||
$psql_session->query_until( |
|
||||||
qr/start/, qq[ |
|
||||||
\\echo start |
|
||||||
CALL pg_wal_replay_wait('${lsn4}'); |
|
||||||
]); |
|
||||||
|
|
||||||
# Make sure standby will be promoted at least at the primary insert LSN we |
|
||||||
# have just observed. Use pg_switch_wal() to force the insert LSN to be |
|
||||||
# written then wait for standby to catchup. |
|
||||||
$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();'); |
|
||||||
$node_primary->wait_for_catchup($node_standby); |
|
||||||
|
|
||||||
$log_offset = -s $node_standby->logfile; |
|
||||||
$node_standby->promote; |
|
||||||
$node_standby->wait_for_log('recovery is not in progress', $log_offset); |
|
||||||
|
|
||||||
ok(1, 'got error after standby promote'); |
|
||||||
|
|
||||||
$node_standby->safe_psql('postgres', "CALL pg_wal_replay_wait('${lsn5}');"); |
|
||||||
|
|
||||||
ok(1, 'wait for already replayed LSN exits immediately even after promotion'); |
|
||||||
|
|
||||||
$output = $node_standby->safe_psql( |
|
||||||
'postgres', qq[ |
|
||||||
CALL pg_wal_replay_wait('${lsn4}', 10, true); |
|
||||||
SELECT pg_wal_replay_wait_status();]); |
|
||||||
ok( $output eq "not in recovery", |
|
||||||
"pg_wal_replay_wait_status() returns correct status after standby promotion" |
|
||||||
); |
|
||||||
|
|
||||||
$node_standby->stop; |
|
||||||
$node_primary->stop; |
|
||||||
|
|
||||||
# If we send \q with $psql_session->quit the command can be sent to the session |
|
||||||
# already closed. So \q is in initial script, here we only finish IPC::Run. |
|
||||||
$psql_session->{run}->finish; |
|
||||||
|
|
||||||
done_testing(); |
|
Loading…
Reference in new issue