You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
postgres/src/backend/utils/time/snapmgr.c

562 lines
14 KiB

/*-------------------------------------------------------------------------
* snapmgr.c
* PostgreSQL snapshot manager
*
* We keep track of snapshots in two ways: those "registered" by resowner.c,
* and the "active snapshot" stack. All snapshots in either of them live in
* persistent memory. When a snapshot is no longer in any of these lists
* (tracked by separate refcounts on each snapshot), its memory can be freed.
*
* These arrangements let us reset MyProc->xmin when there are no snapshots
* referenced by this transaction. (One possible improvement would be to be
* able to advance Xmin when the snapshot with the earliest Xmin is no longer
* referenced. That's a bit harder though, it requires more locking, and
* anyway it should be rather uncommon to keep snapshots referenced for too
* long.)
*
*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.16 2010/09/11 18:38:56 joe Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/transam.h"
#include "access/xact.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
/*
* CurrentSnapshot points to the only snapshot taken in transaction-snapshot
* mode, and to the latest one taken in a read-committed transaction.
* SecondarySnapshot is a snapshot that's always up-to-date as of the current
* instant, even in transaction-snapshot mode. It should only be used for
* special-purpose code (say, RI checking.)
*
* These SnapshotData structs are static to simplify memory allocation
* (see the hack in GetSnapshotData to avoid repeated malloc/free).
*/
static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
/* Pointers to valid snapshots */
static Snapshot CurrentSnapshot = NULL;
static Snapshot SecondarySnapshot = NULL;
/*
* These are updated by GetSnapshotData. We initialize them this way
* for the convenience of TransactionIdIsInProgress: even in bootstrap
* mode, we don't want it to say that BootstrapTransactionId is in progress.
*
* RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no
* one tries to use a stale value. Readers should ensure that it has been set
* to something else before using it.
*/
TransactionId TransactionXmin = FirstNormalTransactionId;
TransactionId RecentXmin = FirstNormalTransactionId;
TransactionId RecentGlobalXmin = InvalidTransactionId;
/*
* Elements of the active snapshot stack.
*
* Each element here accounts for exactly one active_count on SnapshotData.
*
* NB: the code assumes that elements in this list are in non-increasing
* order of as_level; also, the list must be NULL-terminated.
*/
typedef struct ActiveSnapshotElt
{
Snapshot as_snap;
int as_level;
struct ActiveSnapshotElt *as_next;
} ActiveSnapshotElt;
/* Top of the stack of active snapshots */
static ActiveSnapshotElt *ActiveSnapshot = NULL;
/*
* How many snapshots is resowner.c tracking for us?
*
* Note: for now, a simple counter is enough. However, if we ever want to be
* smarter about advancing our MyProc->xmin we will need to be more
* sophisticated about this, perhaps keeping our own list of snapshots.
*/
static int RegisteredSnapshots = 0;
/* first GetTransactionSnapshot call in a transaction? */
bool FirstSnapshotSet = false;
/*
* Remembers whether this transaction registered a transaction snapshot at
* start. We cannot trust FirstSnapshotSet in combination with
* IsolationUsesXactSnapshot(), because GUC may be reset before us.
*/
static bool registered_xact_snapshot = false;
static Snapshot CopySnapshot(Snapshot snapshot);
static void FreeSnapshot(Snapshot snapshot);
static void SnapshotResetXmin(void);
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction.
*
* Note that the return value may point at static storage that will be modified
* by future calls and by CommandCounterIncrement(). Callers should call
* RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
* used very long.
*/
Snapshot
GetTransactionSnapshot(void)
{
/* First call in transaction? */
if (!FirstSnapshotSet)
{
Assert(RegisteredSnapshots == 0);
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
FirstSnapshotSet = true;
/*
* In transaction-snapshot mode, the first snapshot must live until
* end of xact regardless of what the caller does with it, so we must
* register it internally here and unregister it at end of xact.
*/
if (IsolationUsesXactSnapshot())
{
CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot,
TopTransactionResourceOwner);
registered_xact_snapshot = true;
}
return CurrentSnapshot;
}
if (IsolationUsesXactSnapshot())
return CurrentSnapshot;
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
return CurrentSnapshot;
}
/*
* GetLatestSnapshot
* Get a snapshot that is up-to-date as of the current instant,
* even if we are executing in transaction-snapshot mode.
*/
Snapshot
GetLatestSnapshot(void)
{
/* Should not be first call in transaction */
if (!FirstSnapshotSet)
elog(ERROR, "no snapshot has been set");
SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
return SecondarySnapshot;
}
/*
* SnapshotSetCommandId
* Propagate CommandCounterIncrement into the static snapshots, if set
*/
void
SnapshotSetCommandId(CommandId curcid)
{
if (!FirstSnapshotSet)
return;
if (CurrentSnapshot)
CurrentSnapshot->curcid = curcid;
if (SecondarySnapshot)
SecondarySnapshot->curcid = curcid;
}
/*
* CopySnapshot
* Copy the given snapshot.
*
* The copy is palloc'd in TopTransactionContext and has initial refcounts set
* to 0. The returned snapshot has the copied flag set.
*/
static Snapshot
CopySnapshot(Snapshot snapshot)
{
Snapshot newsnap;
Size subxipoff;
Size size;
Assert(snapshot != InvalidSnapshot);
/* We allocate any XID arrays needed in the same palloc block. */
size = subxipoff = sizeof(SnapshotData) +
snapshot->xcnt * sizeof(TransactionId);
if (snapshot->subxcnt > 0)
size += snapshot->subxcnt * sizeof(TransactionId);
newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
memcpy(newsnap, snapshot, sizeof(SnapshotData));
newsnap->regd_count = 0;
newsnap->active_count = 0;
newsnap->copied = true;
/* setup XID array */
if (snapshot->xcnt > 0)
{
newsnap->xip = (TransactionId *) (newsnap + 1);
memcpy(newsnap->xip, snapshot->xip,
snapshot->xcnt * sizeof(TransactionId));
}
else
newsnap->xip = NULL;
Allow read only connections during recovery, known as Hot Standby. Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record. New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far. This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required. Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit. Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
16 years ago
/*
* Setup subXID array. Don't bother to copy it if it had overflowed,
* though, because it's not used anywhere in that case. Except if it's a
* snapshot taken during recovery; all the top-level XIDs are in subxip as
* well in that case, so we mustn't lose them.
Allow read only connections during recovery, known as Hot Standby. Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record. New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far. This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required. Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit. Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
16 years ago
*/
if (snapshot->subxcnt > 0 &&
(!snapshot->suboverflowed || snapshot->takenDuringRecovery))
{
newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
memcpy(newsnap->subxip, snapshot->subxip,
snapshot->subxcnt * sizeof(TransactionId));
}
else
newsnap->subxip = NULL;
return newsnap;
}
/*
* FreeSnapshot
* Free the memory associated with a snapshot.
*/
static void
FreeSnapshot(Snapshot snapshot)
{
Assert(snapshot->regd_count == 0);
Assert(snapshot->active_count == 0);
Assert(snapshot->copied);
pfree(snapshot);
}
/*
* PushActiveSnapshot
* Set the given snapshot as the current active snapshot
*
* If the passed snapshot is a statically-allocated one, or it is possibly
* subject to a future command counter update, create a new long-lived copy
* with active refcount=1. Otherwise, only increment the refcount.
*/
void
PushActiveSnapshot(Snapshot snap)
{
ActiveSnapshotElt *newactive;
Assert(snap != InvalidSnapshot);
newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
/*
* Checking SecondarySnapshot is probably useless here, but it seems
* better to be sure.
*/
if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied)
newactive->as_snap = CopySnapshot(snap);
else
newactive->as_snap = snap;
newactive->as_next = ActiveSnapshot;
newactive->as_level = GetCurrentTransactionNestLevel();
newactive->as_snap->active_count++;
ActiveSnapshot = newactive;
}
/*
* PushUpdatedSnapshot
* As above, except we set the snapshot's CID to the current CID.
*/
void
PushUpdatedSnapshot(Snapshot snapshot)
{
Snapshot newsnap;
/*
* We cannot risk modifying a snapshot that's possibly already used
* elsewhere, so make a new copy to scribble on.
*/
newsnap = CopySnapshot(snapshot);
newsnap->curcid = GetCurrentCommandId(false);
PushActiveSnapshot(newsnap);
}
/*
* PopActiveSnapshot
*
* Remove the topmost snapshot from the active snapshot stack, decrementing the
* reference count, and free it if this was the last reference.
*/
void
PopActiveSnapshot(void)
{
ActiveSnapshotElt *newstack;
newstack = ActiveSnapshot->as_next;
Assert(ActiveSnapshot->as_snap->active_count > 0);
ActiveSnapshot->as_snap->active_count--;
if (ActiveSnapshot->as_snap->active_count == 0 &&
ActiveSnapshot->as_snap->regd_count == 0)
FreeSnapshot(ActiveSnapshot->as_snap);
pfree(ActiveSnapshot);
ActiveSnapshot = newstack;
SnapshotResetXmin();
}
/*
* GetActiveSnapshot
* Return the topmost snapshot in the Active stack.
*/
Snapshot
GetActiveSnapshot(void)
{
Assert(ActiveSnapshot != NULL);
return ActiveSnapshot->as_snap;
}
/*
* ActiveSnapshotSet
* Return whether there is at least one snapshot in the Active stack
*/
bool
ActiveSnapshotSet(void)
{
return ActiveSnapshot != NULL;
}
/*
* RegisterSnapshot
* Register a snapshot as being in use by the current resource owner
*
* If InvalidSnapshot is passed, it is not registered.
*/
Snapshot
RegisterSnapshot(Snapshot snapshot)
{
if (snapshot == InvalidSnapshot)
return InvalidSnapshot;
return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
}
/*
* RegisterSnapshotOnOwner
* As above, but use the specified resource owner
*/
Snapshot
RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
{
Snapshot snap;
if (snapshot == InvalidSnapshot)
return InvalidSnapshot;
/* Static snapshot? Create a persistent copy */
snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
/* and tell resowner.c about it */
ResourceOwnerEnlargeSnapshots(owner);
snap->regd_count++;
ResourceOwnerRememberSnapshot(owner, snap);
RegisteredSnapshots++;
return snap;
}
/*
* UnregisterSnapshot
*
* Decrement the reference count of a snapshot, remove the corresponding
* reference from CurrentResourceOwner, and free the snapshot if no more
* references remain.
*/
void
UnregisterSnapshot(Snapshot snapshot)
{
if (snapshot == NULL)
return;
UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
}
/*
* UnregisterSnapshotFromOwner
* As above, but use the specified resource owner
*/
void
UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
{
if (snapshot == NULL)
return;
Assert(snapshot->regd_count > 0);
Assert(RegisteredSnapshots > 0);
ResourceOwnerForgetSnapshot(owner, snapshot);
RegisteredSnapshots--;
if (--snapshot->regd_count == 0 && snapshot->active_count == 0)
{
FreeSnapshot(snapshot);
SnapshotResetXmin();
}
}
/*
* SnapshotResetXmin
*
* If there are no more snapshots, we can reset our PGPROC->xmin to InvalidXid.
* Note we can do this without locking because we assume that storing an Xid
* is atomic.
*/
static void
SnapshotResetXmin(void)
{
if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL)
MyProc->xmin = InvalidTransactionId;
}
/*
* AtSubCommit_Snapshot
*/
void
AtSubCommit_Snapshot(int level)
{
ActiveSnapshotElt *active;
/*
* Relabel the active snapshots set in this subtransaction as though they
* are owned by the parent subxact.
*/
for (active = ActiveSnapshot; active != NULL; active = active->as_next)
{
if (active->as_level < level)
break;
active->as_level = level - 1;
}
}
/*
* AtSubAbort_Snapshot
* Clean up snapshots after a subtransaction abort
*/
void
AtSubAbort_Snapshot(int level)
{
/* Forget the active snapshots set by this subtransaction */
while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
{
ActiveSnapshotElt *next;
next = ActiveSnapshot->as_next;
/*
* Decrement the snapshot's active count. If it's still registered or
* marked as active by an outer subtransaction, we can't free it yet.
*/
Assert(ActiveSnapshot->as_snap->active_count >= 1);
ActiveSnapshot->as_snap->active_count -= 1;
if (ActiveSnapshot->as_snap->active_count == 0 &&
ActiveSnapshot->as_snap->regd_count == 0)
FreeSnapshot(ActiveSnapshot->as_snap);
/* and free the stack element */
pfree(ActiveSnapshot);
ActiveSnapshot = next;
}
SnapshotResetXmin();
}
/*
* AtEarlyCommit_Snapshot
*
* Snapshot manager's cleanup function, to be called on commit, before
* doing resowner.c resource release.
*/
void
AtEarlyCommit_Snapshot(void)
{
/*
* In transaction-snapshot mode we must unregister our private refcount
* to the transaction-snapshot.
*/
if (registered_xact_snapshot)
UnregisterSnapshotFromOwner(CurrentSnapshot,
TopTransactionResourceOwner);
registered_xact_snapshot = false;
}
/*
* AtEOXact_Snapshot
* Snapshot manager's cleanup function for end of transaction
*/
void
AtEOXact_Snapshot(bool isCommit)
{
/* On commit, complain about leftover snapshots */
if (isCommit)
{
ActiveSnapshotElt *active;
if (RegisteredSnapshots != 0)
elog(WARNING, "%d registered snapshots seem to remain after cleanup",
RegisteredSnapshots);
/* complain about unpopped active snapshots */
for (active = ActiveSnapshot; active != NULL; active = active->as_next)
elog(WARNING, "snapshot %p still active", active);
}
/*
* And reset our state. We don't need to free the memory explicitly --
* it'll go away with TopTransactionContext.
*/
ActiveSnapshot = NULL;
RegisteredSnapshots = 0;
CurrentSnapshot = NULL;
SecondarySnapshot = NULL;
FirstSnapshotSet = false;
registered_xact_snapshot = false;
}