@ -2,11 +2,28 @@
* snapmgr . c
* PostgreSQL snapshot manager
*
* We keep track of snapshots in two ways : the " registered snapshots " list ,
* and the " active snapshot " stack . All snapshots in any of them is supposed
* to be in persistent memory . When a snapshot is no longer in any of these
* lists ( tracked by separate refcounts of each snapshot ) , its memory can be
* freed .
*
* These arrangements let us reset MyProc - > xmin when there are no snapshots
* referenced by this transaction . ( One possible improvement would be to be
* able to advance Xmin when the snapshot with the earliest Xmin is no longer
* referenced . That ' s a bit harder though , it requires more locking , and
* anyway it should be rather uncommon to keep snapshots referenced for too
* long . )
*
* Note : parts of this code could probably be replaced by appropriate use
* of resowner . c .
*
*
* Portions Copyright ( c ) 1996 - 2008 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
*
* IDENTIFICATION
* $ PostgreSQL : pgsql / src / backend / utils / time / snapmgr . c , v 1.1 2008 / 03 / 26 18 : 48 : 59 alvherre Exp $
* $ PostgreSQL : pgsql / src / backend / utils / time / snapmgr . c , v 1.2 2008 / 05 / 12 20 : 02 : 02 alvherre Exp $
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
@ -14,28 +31,30 @@
# include "access/xact.h"
# include "access/transam.h"
# include "storage/proc.h"
# include "storage/procarray.h"
# include "utils/memutils.h"
# include "utils/snapmgr.h"
# include "utils/tqual.h"
# include "utils/memutils.h"
/*
* CurrentSnapshot points to the only snapshot taken in a serializable
* transaction , and to the latest one taken in a read - committed transaction .
* SecondarySnapshot is a snapshot that ' s always up - to - date as of the current
* instant , even on a serializable transaction . It should only be used for
* special - purpose code ( say , RI checking . )
*
* These SnapshotData structs are static to simplify memory allocation
* ( see the hack in GetSnapshotData to avoid repeated malloc / free ) .
*/
static SnapshotData SerializableSnapshotData = { HeapTupleSatisfiesMVCC } ;
static SnapshotData LatestSnapshotData = { HeapTupleSatisfiesMVCC } ;
/* Externally visible pointers to valid snapshots: */
Snapshot SerializableSnapshot = NULL ;
Snapshot LatestSnapshot = NULL ;
static SnapshotData CurrentSnapshotData = { HeapTupleSatisfiesMVCC } ;
static SnapshotData SecondarySnapshotData = { HeapTupleSatisfiesMVCC } ;
/*
* This pointer is not maintained by this module , but it ' s convenient
* to declare it here anyway . Callers typically assign a copy of
* GetTransactionSnapshot ' s result to ActiveSnapshot .
*/
Snapshot ActiveSnapshot = NULL ;
/* Pointers to valid snapshots */
static Snapshot CurrentSnapshot = NULL ;
static Snapshot SecondarySnapshot = NULL ;
/*
* These are updated by GetSnapshotData . We initialize them this way
@ -46,35 +65,106 @@ TransactionId TransactionXmin = FirstNormalTransactionId;
TransactionId RecentXmin = FirstNormalTransactionId ;
TransactionId RecentGlobalXmin = FirstNormalTransactionId ;
/*
* Elements of the list of registered snapshots .
*
* Note that we keep refcounts both here and in SnapshotData . This is because
* the same snapshot may be registered more than once in a subtransaction , and
* if a subxact aborts we want to be able to substract the correct amount of
* counts from SnapshotData . ( Another approach would be keeping one
* RegdSnapshotElt each time a snapshot is registered , but that seems
* unnecessary wastage . )
*
* NB : the code assumes that elements in this list are in non - increasing
* order of s_level ; also , the list must be NULL - terminated .
*/
typedef struct RegdSnapshotElt
{
Snapshot s_snap ;
uint32 s_count ;
int s_level ;
struct RegdSnapshotElt * s_next ;
} RegdSnapshotElt ;
/*
* Elements of the active snapshot stack .
*
* It ' s not necessary to keep a refcount like we do for the registered list ;
* each element here accounts for exactly one active_count on SnapshotData .
* We cannot condense them like we do for RegdSnapshotElt because it would mess
* up the order of entries in the stack .
*
* NB : the code assumes that elements in this list are in non - increasing
* order of as_level ; also , the list must be NULL - terminated .
*/
typedef struct ActiveSnapshotElt
{
Snapshot as_snap ;
int as_level ;
struct ActiveSnapshotElt * as_next ;
} ActiveSnapshotElt ;
/* Head of the list of registered snapshots */
static RegdSnapshotElt * RegisteredSnapshotList = NULL ;
/* Top of the stack of active snapshots */
static ActiveSnapshotElt * ActiveSnapshot = NULL ;
/* first GetTransactionSnapshot call in a transaction? */
bool FirstSnapshotSet = false ;
/*
* Remembers whether this transaction registered a serializable snapshot at
* start . We cannot trust FirstSnapshotSet in combination with
* IsXactIsoLevelSerializable , because GUC may be reset before us .
*/
static bool registered_serializable = false ;
static Snapshot CopySnapshot ( Snapshot snapshot ) ;
static void FreeSnapshot ( Snapshot snapshot ) ;
static void SnapshotResetXmin ( void ) ;
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction .
*
* The SerializableSnapshot is the first one taken in a transaction .
* In serializable mode we just use that one throughout the transaction .
* In read - committed mode , we take a new snapshot each time we are called .
*
* Note that the return value points at static storage that will be modified
* by future calls and by CommandCounterIncrement ( ) . Callers should copy
* the result with CopySnapshot ( ) if it is to be used very long .
* Note that the return value may point at static storage that will be modified
* by future calls and by CommandCounterIncrement ( ) . Callers should call
* RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
* used very long .
*/
Snapshot
GetTransactionSnapshot ( void )
{
/* First call in transaction? */
if ( SerializableSnapshot = = NULL )
if ( ! FirstSnapshotSet )
{
SerializableSnapshot = GetSnapshotData ( & SerializableSnapshotData , true ) ;
return SerializableSnapshot ;
CurrentSnapshot = GetSnapshotData ( & CurrentSnapshotData ) ;
FirstSnapshotSet = true ;
/*
* In serializable mode , the first snapshot must live until end of xact
* regardless of what the caller does with it , so we must register it
* internally here and unregister it at end of xact .
*/
if ( IsXactIsoLevelSerializable )
{
CurrentSnapshot = RegisterSnapshot ( CurrentSnapshot ) ;
registered_serializable = true ;
}
return CurrentSnapshot ;
}
if ( IsXactIsoLevelSerializable )
return SerializableSnapshot ;
return Current Snapshot;
LatestSnapshot = GetSnapshotData ( & LatestSnapshotData , false ) ;
Curren tSnapshot = GetSnapshotData ( & CurrentSnapshotData ) ;
return LatestSnapshot ;
return Curren tSnapshot;
}
/*
@ -86,36 +176,59 @@ Snapshot
GetLatestSnapshot ( void )
{
/* Should not be first call in transaction */
if ( SerializableSnapshot = = NULL )
if ( ! FirstSnapshotSet )
elog ( ERROR , " no snapshot has been set " ) ;
Latest Snapshot = GetSnapshotData ( & LatestSnapshotData , false ) ;
Secondary Snapshot = GetSnapshotData ( & SecondarySnapshotData ) ;
return LatestSnapshot ;
return SecondarySnapshot ;
}
/*
* SnapshotSetCommandId
* Propagate CommandCounterIncrement into the static snapshots , if set
*/
void
SnapshotSetCommandId ( CommandId curcid )
{
if ( ! FirstSnapshotSet )
return ;
if ( CurrentSnapshot )
CurrentSnapshot - > curcid = curcid ;
if ( SecondarySnapshot )
SecondarySnapshot - > curcid = curcid ;
}
/*
* CopySnapshot
* Copy the given snapshot .
*
* The copy is palloc ' d in the current memory context .
* The copy is palloc ' d in TopTransactionContext and has initial refcounts set
* to 0. The returned snapshot has the copied flag set .
*/
Snapshot
static Snapshot
CopySnapshot ( Snapshot snapshot )
{
Snapshot newsnap ;
Size subxipoff ;
Size size ;
Assert ( snapshot ! = InvalidSnapshot ) ;
/* We allocate any XID arrays needed in the same palloc block. */
size = subxipoff = sizeof ( SnapshotData ) +
snapshot - > xcnt * sizeof ( TransactionId ) ;
if ( snapshot - > subxcnt > 0 )
size + = snapshot - > subxcnt * sizeof ( TransactionId ) ;
newsnap = ( Snapshot ) palloc ( size ) ;
newsnap = ( Snapshot ) MemoryContextAlloc ( TopTransactionContext , size ) ;
memcpy ( newsnap , snapshot , sizeof ( SnapshotData ) ) ;
newsnap - > regd_count = 0 ;
newsnap - > active_count = 0 ;
newsnap - > copied = true ;
/* setup XID array */
if ( snapshot - > xcnt > 0 )
{
@ -141,32 +254,389 @@ CopySnapshot(Snapshot snapshot)
/*
* FreeSnapshot
* Free a snapshot previously copied with CopySnapshot .
* Free the memory associated with a snapshot .
*/
static void
FreeSnapshot ( Snapshot snapshot )
{
Assert ( snapshot - > regd_count = = 0 ) ;
Assert ( snapshot - > active_count = = 0 ) ;
pfree ( snapshot ) ;
}
/*
* PushActiveSnapshot
* Set the given snapshot as the current active snapshot
*
* This is currently identical to pfree , but is provided for cleanliness .
* If this is the first use of this snapshot , create a new long - lived copy with
* active refcount = 1. Otherwise , only increment the refcount .
*/
void
PushActiveSnapshot ( Snapshot snap )
{
ActiveSnapshotElt * newactive ;
Assert ( snap ! = InvalidSnapshot ) ;
newactive = MemoryContextAlloc ( TopTransactionContext , sizeof ( ActiveSnapshotElt ) ) ;
/* Static snapshot? Create a persistent copy */
newactive - > as_snap = snap - > copied ? snap : CopySnapshot ( snap ) ;
newactive - > as_next = ActiveSnapshot ;
newactive - > as_level = GetCurrentTransactionNestLevel ( ) ;
newactive - > as_snap - > active_count + + ;
ActiveSnapshot = newactive ;
}
/*
* PushUpdatedSnapshot
* As above , except we set the snapshot ' s CID to the current CID .
*/
void
PushUpdatedSnapshot ( Snapshot snapshot )
{
Snapshot newsnap ;
/*
* We cannot risk modifying a snapshot that ' s possibly already used
* elsewhere , so make a new copy to scribble on .
*/
newsnap = CopySnapshot ( snapshot ) ;
newsnap - > curcid = GetCurrentCommandId ( false ) ;
PushActiveSnapshot ( newsnap ) ;
}
/*
* PopActiveSnapshot
*
* Do * not * apply this to the results of GetTransactionSnapshot or
* GetLatestSnapshot , since those are just static structs .
* Remove the topmost snapshot from the active snapshot stack , decrementing the
* reference count , and free it if this was the last reference .
*/
void
FreeSnapshot ( Snapshot snapshot )
PopActiveSnapshot ( void )
{
pfree ( snapshot ) ;
ActiveSnapshotElt * newstack ;
newstack = ActiveSnapshot - > as_next ;
Assert ( ActiveSnapshot - > as_snap - > active_count > 0 ) ;
ActiveSnapshot - > as_snap - > active_count - - ;
if ( ActiveSnapshot - > as_snap - > active_count = = 0 & &
ActiveSnapshot - > as_snap - > regd_count = = 0 )
FreeSnapshot ( ActiveSnapshot - > as_snap ) ;
pfree ( ActiveSnapshot ) ;
ActiveSnapshot = newstack ;
SnapshotResetXmin ( ) ;
}
/*
* FreeXactSnapshot
* Free snapshot ( s ) at end of transaction .
* GetActiveSnapshot
* Return the topmost snapshot in the Active stack .
*/
Snapshot
GetActiveSnapshot ( void )
{
Assert ( ActiveSnapshot ! = NULL ) ;
return ActiveSnapshot - > as_snap ;
}
/*
* ActiveSnapshotSet
* Return whether there is at least one snapsho in the Active stack
*/
bool
ActiveSnapshotSet ( void )
{
return ActiveSnapshot ! = NULL ;
}
/*
* RegisterSnapshot
* Register a snapshot as being in use
*
* If InvalidSnapshot is passed , it is not registered .
*/
Snapshot
RegisterSnapshot ( Snapshot snapshot )
{
RegdSnapshotElt * elt ;
RegdSnapshotElt * newhead ;
int level ;
if ( snapshot = = InvalidSnapshot )
return InvalidSnapshot ;
level = GetCurrentTransactionNestLevel ( ) ;
/*
* If there ' s already an item in the list for the same snapshot and the
* same subxact nest level , increment its refcounts . Otherwise create a
* new one .
*/
for ( elt = RegisteredSnapshotList ; elt ! = NULL ; elt = elt - > s_next )
{
if ( elt - > s_level < level )
break ;
if ( elt - > s_snap = = snapshot & & elt - > s_level = = level )
{
elt - > s_snap - > regd_count + + ;
elt - > s_count + + ;
return elt - > s_snap ;
}
}
/*
* Create the new list element . If it ' s not been copied into persistent
* memory already , we must do so ; otherwise we can just increment the
* reference count .
*/
newhead = MemoryContextAlloc ( TopTransactionContext , sizeof ( RegdSnapshotElt ) ) ;
newhead - > s_next = RegisteredSnapshotList ;
/* Static snapshot? Create a persistent copy */
newhead - > s_snap = snapshot - > copied ? snapshot : CopySnapshot ( snapshot ) ;
newhead - > s_level = level ;
newhead - > s_count = 1 ;
newhead - > s_snap - > regd_count + + ;
RegisteredSnapshotList = newhead ;
return RegisteredSnapshotList - > s_snap ;
}
/*
* UnregisterSnapshot
* Signals that a snapshot is no longer necessary
*
* If both reference counts fall to zero , the snapshot memory is released .
* If only the registered list refcount falls to zero , just the list element is
* freed .
*/
void
UnregisterSnapshot ( Snapshot snapshot )
{
RegdSnapshotElt * prev = NULL ;
RegdSnapshotElt * elt ;
bool found = false ;
if ( snapshot = = InvalidSnapshot )
return ;
for ( elt = RegisteredSnapshotList ; elt ! = NULL ; elt = elt - > s_next )
{
if ( elt - > s_snap = = snapshot )
{
Assert ( elt - > s_snap - > regd_count > 0 ) ;
Assert ( elt - > s_count > 0 ) ;
elt - > s_snap - > regd_count - - ;
elt - > s_count - - ;
found = true ;
if ( elt - > s_count = = 0 )
{
/* delink it from the registered snapshot list */
if ( prev )
prev - > s_next = elt - > s_next ;
else
RegisteredSnapshotList = elt - > s_next ;
/* free the snapshot itself if it's no longer relevant */
if ( elt - > s_snap - > regd_count = = 0 & & elt - > s_snap - > active_count = = 0 )
FreeSnapshot ( elt - > s_snap ) ;
/* and free the list element */
pfree ( elt ) ;
}
break ;
}
prev = elt ;
}
if ( ! found )
elog ( WARNING , " unregistering failed for snapshot %p " , snapshot ) ;
SnapshotResetXmin ( ) ;
}
/*
* SnapshotResetXmin
*
* If there are no more snapshots , we can reset our PGPROC - > xmin to InvalidXid .
* Note we can do this without locking because we assume that storing an Xid
* is atomic .
*/
static void
SnapshotResetXmin ( void )
{
if ( RegisteredSnapshotList = = NULL & & ActiveSnapshot = = NULL )
MyProc - > xmin = InvalidTransactionId ;
}
/*
* AtSubCommit_Snapshot
*/
void
FreeXactSnapshot ( void )
AtSubCommit_Snapshot ( int level )
{
ActiveSnapshotElt * active ;
RegdSnapshotElt * regd ;
/*
* We do not free the xip arrays for the static snapshot structs ; they
* will be reused soon . So this is now just a state change to prevent
* outside callers from accessing the snapshots .
* Relabel the active snapshots set in this subtransaction as though they
* are owned by the parent subxact .
*/
SerializableSnapshot = NULL ;
LatestSnapshot = NULL ;
ActiveSnapshot = NULL ; /* just for cleanliness */
for ( active = ActiveSnapshot ; active ! = NULL ; active = active - > as_next )
{
if ( active - > as_level < level )
break ;
active - > as_level = level - 1 ;
}
/*
* Reassign all registered snapshots to the parent subxact .
*
* Note : this code is somewhat bogus in that we could end up with multiple
* entries for the same snapshot and the same subxact level ( my parent ' s
* level ) . Cleaning that up is more trouble than it ' s currently worth ,
* however .
*/
for ( regd = RegisteredSnapshotList ; regd ! = NULL ; regd = regd - > s_next )
{
if ( regd - > s_level = = level )
regd - > s_level - - ;
}
}
/*
* AtSubAbort_Snapshot
* Clean up snapshots after a subtransaction abort
*/
void
AtSubAbort_Snapshot ( int level )
{
RegdSnapshotElt * prev ;
RegdSnapshotElt * regd ;
/* Forget the active snapshots set by this subtransaction */
while ( ActiveSnapshot & & ActiveSnapshot - > as_level > = level )
{
ActiveSnapshotElt * next ;
next = ActiveSnapshot - > as_next ;
/*
* Decrement the snapshot ' s active count . If it ' s still registered or
* marked as active by an outer subtransaction , we can ' t free it yet .
*/
Assert ( ActiveSnapshot - > as_snap - > active_count > = 1 ) ;
ActiveSnapshot - > as_snap - > active_count - = 1 ;
if ( ActiveSnapshot - > as_snap - > active_count = = 0 & &
ActiveSnapshot - > as_snap - > regd_count = = 0 )
FreeSnapshot ( ActiveSnapshot - > as_snap ) ;
/* and free the stack element */
pfree ( ActiveSnapshot ) ;
ActiveSnapshot = next ;
}
/* Unregister all snapshots registered during this subtransaction */
prev = NULL ;
for ( regd = RegisteredSnapshotList ; regd ! = NULL ; )
{
if ( regd - > s_level > = level )
{
RegdSnapshotElt * tofree ;
if ( prev )
prev - > s_next = regd - > s_next ;
else
RegisteredSnapshotList = regd - > s_next ;
tofree = regd ;
regd = regd - > s_next ;
tofree - > s_snap - > regd_count - = tofree - > s_count ;
/* free the snapshot if possible */
if ( tofree - > s_snap - > regd_count = = 0 & &
tofree - > s_snap - > active_count = = 0 )
FreeSnapshot ( tofree - > s_snap ) ;
/* and free the list element */
pfree ( tofree ) ;
}
else
{
prev = regd ;
regd = regd - > s_next ;
}
}
SnapshotResetXmin ( ) ;
}
/*
* AtEOXact_Snapshot
* Snapshot manager ' s cleanup function for end of transaction
*/
void
AtEOXact_Snapshot ( bool isCommit )
{
/* On commit, complain about leftover snapshots */
if ( isCommit )
{
ActiveSnapshotElt * active ;
RegdSnapshotElt * regd ;
/*
* On a serializable snapshot we must first unregister our private
* refcount to the serializable snapshot .
*/
if ( registered_serializable )
UnregisterSnapshot ( CurrentSnapshot ) ;
/* complain about unpopped active snapshots */
for ( active = ActiveSnapshot ; active ! = NULL ; active = active - > as_next )
{
ereport ( WARNING ,
( errmsg ( " snapshot %p still active " , active ) ) ) ;
}
/* complain about any unregistered snapshot */
for ( regd = RegisteredSnapshotList ; regd ! = NULL ; regd = regd - > s_next )
{
ereport ( WARNING ,
( errmsg ( " snapshot %p not destroyed at commit (%d regd refs, %d active refs) " ,
regd - > s_snap , regd - > s_snap - > regd_count ,
regd - > s_snap - > active_count ) ) ) ;
}
}
/*
* And reset our state . We don ' t need to free the memory explicitely - -
* it ' ll go away with TopTransactionContext .
*/
ActiveSnapshot = NULL ;
RegisteredSnapshotList = NULL ;
CurrentSnapshot = NULL ;
SecondarySnapshot = NULL ;
FirstSnapshotSet = false ;
registered_serializable = false ;
}