@ -65,7 +65,6 @@
# include "storage/spin.h"
# include "utils/builtins.h"
# include "utils/memutils.h"
# include "utils/old_snapshot.h"
# include "utils/rel.h"
# include "utils/resowner_private.h"
# include "utils/snapmgr.h"
@ -73,14 +72,6 @@
# include "utils/timestamp.h"
/*
* GUC parameters
*/
int old_snapshot_threshold ; /* number of minutes, -1 disables */
volatile OldSnapshotControlData * oldSnapshotControl ;
/*
* CurrentSnapshot points to the only snapshot taken in transaction - snapshot
* mode , and to the latest one taken in a read - committed transaction .
@ -170,7 +161,6 @@ typedef struct ExportedSnapshot
static List * exportedSnapshots = NIL ;
/* Prototypes for local functions */
static TimestampTz AlignTimestampToMinuteBoundary ( TimestampTz ts ) ;
static Snapshot CopySnapshot ( Snapshot snapshot ) ;
static void FreeSnapshot ( Snapshot snapshot ) ;
static void SnapshotResetXmin ( void ) ;
@ -194,50 +184,6 @@ typedef struct SerializedSnapshotData
XLogRecPtr lsn ;
} SerializedSnapshotData ;
Size
SnapMgrShmemSize ( void )
{
Size size ;
size = offsetof ( OldSnapshotControlData , xid_by_minute ) ;
if ( old_snapshot_threshold > 0 )
size = add_size ( size , mul_size ( sizeof ( TransactionId ) ,
OLD_SNAPSHOT_TIME_MAP_ENTRIES ) ) ;
return size ;
}
/*
* Initialize for managing old snapshot detection .
*/
void
SnapMgrInit ( void )
{
bool found ;
/*
* Create or attach to the OldSnapshotControlData structure .
*/
oldSnapshotControl = ( volatile OldSnapshotControlData * )
ShmemInitStruct ( " OldSnapshotControlData " ,
SnapMgrShmemSize ( ) , & found ) ;
if ( ! found )
{
SpinLockInit ( & oldSnapshotControl - > mutex_current ) ;
oldSnapshotControl - > current_timestamp = 0 ;
SpinLockInit ( & oldSnapshotControl - > mutex_latest_xmin ) ;
oldSnapshotControl - > latest_xmin = InvalidTransactionId ;
oldSnapshotControl - > next_map_update = 0 ;
SpinLockInit ( & oldSnapshotControl - > mutex_threshold ) ;
oldSnapshotControl - > threshold_timestamp = 0 ;
oldSnapshotControl - > threshold_xid = InvalidTransactionId ;
oldSnapshotControl - > head_offset = 0 ;
oldSnapshotControl - > head_timestamp = 0 ;
oldSnapshotControl - > count_used = 0 ;
}
}
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction .
@ -1656,420 +1602,6 @@ HaveRegisteredOrActiveSnapshot(void)
}
/*
* Return a timestamp that is exactly on a minute boundary .
*
* If the argument is already aligned , return that value , otherwise move to
* the next minute boundary following the given time .
*/
static TimestampTz
AlignTimestampToMinuteBoundary ( TimestampTz ts )
{
TimestampTz retval = ts + ( USECS_PER_MINUTE - 1 ) ;
return retval - ( retval % USECS_PER_MINUTE ) ;
}
/*
* Get current timestamp for snapshots
*
* This is basically GetCurrentTimestamp ( ) , but with a guarantee that
* the result never moves backward .
*/
TimestampTz
GetSnapshotCurrentTimestamp ( void )
{
TimestampTz now = GetCurrentTimestamp ( ) ;
/*
* Don ' t let time move backward ; if it hasn ' t advanced , use the old value .
*/
SpinLockAcquire ( & oldSnapshotControl - > mutex_current ) ;
if ( now < = oldSnapshotControl - > current_timestamp )
now = oldSnapshotControl - > current_timestamp ;
else
oldSnapshotControl - > current_timestamp = now ;
SpinLockRelease ( & oldSnapshotControl - > mutex_current ) ;
return now ;
}
/*
* Get timestamp through which vacuum may have processed based on last stored
* value for threshold_timestamp .
*
* XXX : So far , we never trust that a 64 - bit value can be read atomically ; if
* that ever changes , we could get rid of the spinlock here .
*/
TimestampTz
GetOldSnapshotThresholdTimestamp ( void )
{
TimestampTz threshold_timestamp ;
SpinLockAcquire ( & oldSnapshotControl - > mutex_threshold ) ;
threshold_timestamp = oldSnapshotControl - > threshold_timestamp ;
SpinLockRelease ( & oldSnapshotControl - > mutex_threshold ) ;
return threshold_timestamp ;
}
void
SetOldSnapshotThresholdTimestamp ( TimestampTz ts , TransactionId xlimit )
{
SpinLockAcquire ( & oldSnapshotControl - > mutex_threshold ) ;
Assert ( oldSnapshotControl - > threshold_timestamp < = ts ) ;
Assert ( TransactionIdPrecedesOrEquals ( oldSnapshotControl - > threshold_xid , xlimit ) ) ;
oldSnapshotControl - > threshold_timestamp = ts ;
oldSnapshotControl - > threshold_xid = xlimit ;
SpinLockRelease ( & oldSnapshotControl - > mutex_threshold ) ;
}
/*
* XXX : Magic to keep old_snapshot_threshold tests appear " working " . They
* currently are broken , and discussion of what to do about them is
* ongoing . See
* https : //www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
*/
void
SnapshotTooOldMagicForTest ( void )
{
TimestampTz ts = GetSnapshotCurrentTimestamp ( ) ;
Assert ( old_snapshot_threshold = = 0 ) ;
ts - = 5 * USECS_PER_SEC ;
SpinLockAcquire ( & oldSnapshotControl - > mutex_threshold ) ;
oldSnapshotControl - > threshold_timestamp = ts ;
SpinLockRelease ( & oldSnapshotControl - > mutex_threshold ) ;
}
/*
* If there is a valid mapping for the timestamp , set * xlimitp to
* that . Returns whether there is such a mapping .
*/
static bool
GetOldSnapshotFromTimeMapping ( TimestampTz ts , TransactionId * xlimitp )
{
bool in_mapping = false ;
Assert ( ts = = AlignTimestampToMinuteBoundary ( ts ) ) ;
LWLockAcquire ( OldSnapshotTimeMapLock , LW_SHARED ) ;
if ( oldSnapshotControl - > count_used > 0
& & ts > = oldSnapshotControl - > head_timestamp )
{
int offset ;
offset = ( ( ts - oldSnapshotControl - > head_timestamp )
/ USECS_PER_MINUTE ) ;
if ( offset > oldSnapshotControl - > count_used - 1 )
offset = oldSnapshotControl - > count_used - 1 ;
offset = ( oldSnapshotControl - > head_offset + offset )
% OLD_SNAPSHOT_TIME_MAP_ENTRIES ;
* xlimitp = oldSnapshotControl - > xid_by_minute [ offset ] ;
in_mapping = true ;
}
LWLockRelease ( OldSnapshotTimeMapLock ) ;
return in_mapping ;
}
/*
* TransactionIdLimitedForOldSnapshots
*
* Apply old snapshot limit . This is intended to be called for page pruning
* and table vacuuming , to allow old_snapshot_threshold to override the normal
* global xmin value . Actual testing for snapshot too old will be based on
* whether a snapshot timestamp is prior to the threshold timestamp set in
* this function .
*
* If the limited horizon allows a cleanup action that otherwise would not be
* possible , SetOldSnapshotThresholdTimestamp ( * limit_ts , * limit_xid ) needs to
* be called before that cleanup action .
*/
bool
TransactionIdLimitedForOldSnapshots ( TransactionId recentXmin ,
Relation relation ,
TransactionId * limit_xid ,
TimestampTz * limit_ts )
{
TimestampTz ts ;
TransactionId xlimit = recentXmin ;
TransactionId latest_xmin ;
TimestampTz next_map_update_ts ;
TransactionId threshold_timestamp ;
TransactionId threshold_xid ;
Assert ( TransactionIdIsNormal ( recentXmin ) ) ;
Assert ( OldSnapshotThresholdActive ( ) ) ;
Assert ( limit_ts ! = NULL & & limit_xid ! = NULL ) ;
/*
* TestForOldSnapshot ( ) assumes early pruning advances the page LSN , so we
* can ' t prune early when skipping WAL .
*/
if ( ! RelationAllowsEarlyPruning ( relation ) | | ! RelationNeedsWAL ( relation ) )
return false ;
ts = GetSnapshotCurrentTimestamp ( ) ;
SpinLockAcquire ( & oldSnapshotControl - > mutex_latest_xmin ) ;
latest_xmin = oldSnapshotControl - > latest_xmin ;
next_map_update_ts = oldSnapshotControl - > next_map_update ;
SpinLockRelease ( & oldSnapshotControl - > mutex_latest_xmin ) ;
/*
* Zero threshold always overrides to latest xmin , if valid . Without some
* heuristic it will find its own snapshot too old on , for example , a
* simple UPDATE - - which would make it useless for most testing , but
* there is no principled way to ensure that it doesn ' t fail in this way .
* Use a five - second delay to try to get useful testing behavior , but this
* may need adjustment .
*/
if ( old_snapshot_threshold = = 0 )
{
if ( TransactionIdPrecedes ( latest_xmin , MyProc - > xmin )
& & TransactionIdFollows ( latest_xmin , xlimit ) )
xlimit = latest_xmin ;
ts - = 5 * USECS_PER_SEC ;
}
else
{
ts = AlignTimestampToMinuteBoundary ( ts )
- ( old_snapshot_threshold * USECS_PER_MINUTE ) ;
/* Check for fast exit without LW locking. */
SpinLockAcquire ( & oldSnapshotControl - > mutex_threshold ) ;
threshold_timestamp = oldSnapshotControl - > threshold_timestamp ;
threshold_xid = oldSnapshotControl - > threshold_xid ;
SpinLockRelease ( & oldSnapshotControl - > mutex_threshold ) ;
if ( ts = = threshold_timestamp )
{
/*
* Current timestamp is in same bucket as the last limit that was
* applied . Reuse .
*/
xlimit = threshold_xid ;
}
else if ( ts = = next_map_update_ts )
{
/*
* FIXME : This branch is super iffy - but that should probably
* fixed separately .
*/
xlimit = latest_xmin ;
}
else if ( GetOldSnapshotFromTimeMapping ( ts , & xlimit ) )
{
}
/*
* Failsafe protection against vacuuming work of active transaction .
*
* This is not an assertion because we avoid the spinlock for
* performance , leaving open the possibility that xlimit could advance
* and be more current ; but it seems prudent to apply this limit . It
* might make pruning a tiny bit less aggressive than it could be , but
* protects against data loss bugs .
*/
if ( TransactionIdIsNormal ( latest_xmin )
& & TransactionIdPrecedes ( latest_xmin , xlimit ) )
xlimit = latest_xmin ;
}
if ( TransactionIdIsValid ( xlimit ) & &
TransactionIdFollowsOrEquals ( xlimit , recentXmin ) )
{
* limit_ts = ts ;
* limit_xid = xlimit ;
return true ;
}
return false ;
}
/*
* Take care of the circular buffer that maps time to xid .
*/
void
MaintainOldSnapshotTimeMapping ( TimestampTz whenTaken , TransactionId xmin )
{
TimestampTz ts ;
TransactionId latest_xmin ;
TimestampTz update_ts ;
bool map_update_required = false ;
/* Never call this function when old snapshot checking is disabled. */
Assert ( old_snapshot_threshold > = 0 ) ;
ts = AlignTimestampToMinuteBoundary ( whenTaken ) ;
/*
* Keep track of the latest xmin seen by any process . Update mapping with
* a new value when we have crossed a bucket boundary .
*/
SpinLockAcquire ( & oldSnapshotControl - > mutex_latest_xmin ) ;
latest_xmin = oldSnapshotControl - > latest_xmin ;
update_ts = oldSnapshotControl - > next_map_update ;
if ( ts > update_ts )
{
oldSnapshotControl - > next_map_update = ts ;
map_update_required = true ;
}
if ( TransactionIdFollows ( xmin , latest_xmin ) )
oldSnapshotControl - > latest_xmin = xmin ;
SpinLockRelease ( & oldSnapshotControl - > mutex_latest_xmin ) ;
/* We only needed to update the most recent xmin value. */
if ( ! map_update_required )
return ;
/* No further tracking needed for 0 (used for testing). */
if ( old_snapshot_threshold = = 0 )
return ;
/*
* We don ' t want to do something stupid with unusual values , but we don ' t
* want to litter the log with warnings or break otherwise normal
* processing for this feature ; so if something seems unreasonable , just
* log at DEBUG level and return without doing anything .
*/
if ( whenTaken < 0 )
{
elog ( DEBUG1 ,
" MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld " ,
( long ) whenTaken ) ;
return ;
}
if ( ! TransactionIdIsNormal ( xmin ) )
{
elog ( DEBUG1 ,
" MaintainOldSnapshotTimeMapping called with xmin = %lu " ,
( unsigned long ) xmin ) ;
return ;
}
LWLockAcquire ( OldSnapshotTimeMapLock , LW_EXCLUSIVE ) ;
Assert ( oldSnapshotControl - > head_offset > = 0 ) ;
Assert ( oldSnapshotControl - > head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES ) ;
Assert ( ( oldSnapshotControl - > head_timestamp % USECS_PER_MINUTE ) = = 0 ) ;
Assert ( oldSnapshotControl - > count_used > = 0 ) ;
Assert ( oldSnapshotControl - > count_used < = OLD_SNAPSHOT_TIME_MAP_ENTRIES ) ;
if ( oldSnapshotControl - > count_used = = 0 )
{
/* set up first entry for empty mapping */
oldSnapshotControl - > head_offset = 0 ;
oldSnapshotControl - > head_timestamp = ts ;
oldSnapshotControl - > count_used = 1 ;
oldSnapshotControl - > xid_by_minute [ 0 ] = xmin ;
}
else if ( ts < oldSnapshotControl - > head_timestamp )
{
/* old ts; log it at DEBUG */
LWLockRelease ( OldSnapshotTimeMapLock ) ;
elog ( DEBUG1 ,
" MaintainOldSnapshotTimeMapping called with old whenTaken = %ld " ,
( long ) whenTaken ) ;
return ;
}
else if ( ts < = ( oldSnapshotControl - > head_timestamp +
( ( oldSnapshotControl - > count_used - 1 )
* USECS_PER_MINUTE ) ) )
{
/* existing mapping; advance xid if possible */
int bucket = ( oldSnapshotControl - > head_offset
+ ( ( ts - oldSnapshotControl - > head_timestamp )
/ USECS_PER_MINUTE ) )
% OLD_SNAPSHOT_TIME_MAP_ENTRIES ;
if ( TransactionIdPrecedes ( oldSnapshotControl - > xid_by_minute [ bucket ] , xmin ) )
oldSnapshotControl - > xid_by_minute [ bucket ] = xmin ;
}
else
{
/* We need a new bucket, but it might not be the very next one. */
int distance_to_new_tail ;
int distance_to_current_tail ;
int advance ;
/*
* Our goal is for the new " tail " of the mapping , that is , the entry
* which is newest and thus furthest from the " head " entry , to
* correspond to " ts " . Since there ' s one entry per minute , the
* distance between the current head and the new tail is just the
* number of minutes of difference between ts and the current
* head_timestamp .
*
* The distance from the current head to the current tail is one less
* than the number of entries in the mapping , because the entry at the
* head_offset is for 0 minutes after head_timestamp .
*
* The difference between these two values is the number of minutes by
* which we need to advance the mapping , either adding new entries or
* rotating old ones out .
*/
distance_to_new_tail =
( ts - oldSnapshotControl - > head_timestamp ) / USECS_PER_MINUTE ;
distance_to_current_tail =
oldSnapshotControl - > count_used - 1 ;
advance = distance_to_new_tail - distance_to_current_tail ;
Assert ( advance > 0 ) ;
if ( advance > = OLD_SNAPSHOT_TIME_MAP_ENTRIES )
{
/* Advance is so far that all old data is junk; start over. */
oldSnapshotControl - > head_offset = 0 ;
oldSnapshotControl - > count_used = 1 ;
oldSnapshotControl - > xid_by_minute [ 0 ] = xmin ;
oldSnapshotControl - > head_timestamp = ts ;
}
else
{
/* Store the new value in one or more buckets. */
int i ;
for ( i = 0 ; i < advance ; i + + )
{
if ( oldSnapshotControl - > count_used = = OLD_SNAPSHOT_TIME_MAP_ENTRIES )
{
/* Map full and new value replaces old head. */
int old_head = oldSnapshotControl - > head_offset ;
if ( old_head = = ( OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1 ) )
oldSnapshotControl - > head_offset = 0 ;
else
oldSnapshotControl - > head_offset = old_head + 1 ;
oldSnapshotControl - > xid_by_minute [ old_head ] = xmin ;
oldSnapshotControl - > head_timestamp + = USECS_PER_MINUTE ;
}
else
{
/* Extend map to unused entry. */
int new_tail = ( oldSnapshotControl - > head_offset
+ oldSnapshotControl - > count_used )
% OLD_SNAPSHOT_TIME_MAP_ENTRIES ;
oldSnapshotControl - > count_used + + ;
oldSnapshotControl - > xid_by_minute [ new_tail ] = xmin ;
}
}
}
}
LWLockRelease ( OldSnapshotTimeMapLock ) ;
}
/*
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
* access to behave just like it did at a certain point in the past .