@ -3,6 +3,38 @@
* s_lock . c
* Hardware - dependent implementation of spinlocks .
*
* When waiting for a contended spinlock we loop tightly for awhile , then
* delay using pg_usleep ( ) and try again . Preferably , " awhile " should be a
* small multiple of the maximum time we expect a spinlock to be held . 100
* iterations seems about right as an initial guess . However , on a
* uniprocessor the loop is a waste of cycles , while in a multi - CPU scenario
* it ' s usually better to spin a bit longer than to call the kernel , so we try
* to adapt the spin loop count depending on whether we seem to be in a
* uniprocessor or multiprocessor .
*
* Note : you might think MIN_SPINS_PER_DELAY should be just 1 , but you ' d
* be wrong ; there are platforms where that can result in a " stuck
* spinlock " failure. This has been seen particularly on Alphas; it seems
* that the first TAS after returning from kernel space will always fail
* on that hardware .
*
* Once we do decide to block , we use randomly increasing pg_usleep ( )
* delays . The first delay is 1 msec , then the delay randomly increases to
* about one second , after which we reset to 1 msec and start again . The
* idea here is that in the presence of heavy contention we need to
* increase the delay , else the spinlock holder may never get to run and
* release the lock . ( Consider situation where spinlock holder has been
* nice ' d down in priority by the scheduler - - - it will not get scheduled
* until all would - be acquirers are sleeping , so if we always use a 1 - msec
* sleep , there is a real possibility of starvation . ) But we can ' t just
* clamp the delay to an upper bound , else it would take a long time to
* make a reasonable number of tries .
*
* We time out and declare error after NUM_DELAYS delays ( thus , exactly
* that many tries ) . With the given settings , this will usually take 2 or
* so minutes . It seems better to fix the total number of tries ( and thus
* the probability of unintended failure ) than to fix the total time
* spent .
*
* Portions Copyright ( c ) 1996 - 2016 , PostgreSQL Global Development Group
* Portions Copyright ( c ) 1994 , Regents of the University of California
@ -21,6 +53,14 @@
# include "storage/s_lock.h"
# include "storage/barrier.h"
# define MIN_SPINS_PER_DELAY 10
# define MAX_SPINS_PER_DELAY 1000
# define NUM_DELAYS 1000
# define MIN_DELAY_USEC 1000L
# define MAX_DELAY_USEC 1000000L
slock_t dummy_spinlock ;
static int spins_per_delay = DEFAULT_SPINS_PER_DELAY ;
@ -30,117 +70,107 @@ static int spins_per_delay = DEFAULT_SPINS_PER_DELAY;
* s_lock_stuck ( ) - complain about a stuck spinlock
*/
static void
s_lock_stuck ( volatile slock_t * lock , const char * file , int line )
s_lock_stuck ( void * p , const char * file , int line )
{
# if defined(S_LOCK_TEST)
fprintf ( stderr ,
" \n Stuck spinlock (%p) detected at %s:%d. \n " ,
lock , file , line ) ;
p , file , line ) ;
exit ( 1 ) ;
# else
elog ( PANIC , " stuck spinlock (%p) detected at %s:%d " ,
lock , file , line ) ;
p , file , line ) ;
# endif
}
/*
* s_lock ( lock ) - platform - independent portion of waiting for a spinlock .
*/
int
s_lock ( volatile slock_t * lock , const char * file , int line )
{
/*
* We loop tightly for awhile , then delay using pg_usleep ( ) and try again .
* Preferably , " awhile " should be a small multiple of the maximum time we
* expect a spinlock to be held . 100 iterations seems about right as an
* initial guess . However , on a uniprocessor the loop is a waste of
* cycles , while in a multi - CPU scenario it ' s usually better to spin a bit
* longer than to call the kernel , so we try to adapt the spin loop count
* depending on whether we seem to be in a uniprocessor or multiprocessor .
*
* Note : you might think MIN_SPINS_PER_DELAY should be just 1 , but you ' d
* be wrong ; there are platforms where that can result in a " stuck
* spinlock " failure. This has been seen particularly on Alphas; it seems
* that the first TAS after returning from kernel space will always fail
* on that hardware .
*
* Once we do decide to block , we use randomly increasing pg_usleep ( )
* delays . The first delay is 1 msec , then the delay randomly increases to
* about one second , after which we reset to 1 msec and start again . The
* idea here is that in the presence of heavy contention we need to
* increase the delay , else the spinlock holder may never get to run and
* release the lock . ( Consider situation where spinlock holder has been
* nice ' d down in priority by the scheduler - - - it will not get scheduled
* until all would - be acquirers are sleeping , so if we always use a 1 - msec
* sleep , there is a real possibility of starvation . ) But we can ' t just
* clamp the delay to an upper bound , else it would take a long time to
* make a reasonable number of tries .
*
* We time out and declare error after NUM_DELAYS delays ( thus , exactly
* that many tries ) . With the given settings , this will usually take 2 or
* so minutes . It seems better to fix the total number of tries ( and thus
* the probability of unintended failure ) than to fix the total time
* spent .
*/
# define MIN_SPINS_PER_DELAY 10
# define MAX_SPINS_PER_DELAY 1000
# define NUM_DELAYS 1000
# define MIN_DELAY_USEC 1000L
# define MAX_DELAY_USEC 1000000L
int spins = 0 ;
int delays = 0 ;
int cur_delay = 0 ;
SpinDelayStatus delayStatus = init_spin_delay ( ( void * ) lock ) ;
while ( TAS_SPIN ( lock ) )
{
/* CPU-specific delay each time through the loop */
SPIN_DELAY ( ) ;
perform_spin_delay ( & delayStatus ) ;
}
/* Block the process every spins_per_delay tries */
if ( + + spins > = spins_per_delay )
{
if ( + + delays > NUM_DELAYS )
s_lock_stuck ( lock , file , line ) ;
finish_spin_delay ( & delayStatus ) ;
if ( cur_delay = = 0 ) /* first time to delay? */
cur_delay = MIN_DELAY_USEC ;
return delayStatus . delays ;
}
pg_usleep ( cur_delay ) ;
# ifdef USE_DEFAULT_S_UNLOCK
void
s_unlock ( volatile slock_t * lock )
{
# ifdef TAS_ACTIVE_WORD
/* HP's PA-RISC */
* TAS_ACTIVE_WORD ( lock ) = - 1 ;
# else
* lock = 0 ;
# endif
}
# endif
/*
* Wait while spinning on a contended spinlock .
*/
void
perform_spin_delay ( SpinDelayStatus * status )
{
/* CPU-specific delay each time through the loop */
SPIN_DELAY ( ) ;
/* Block the process every spins_per_delay tries */
if ( + + ( status - > spins ) > = spins_per_delay )
{
if ( + + ( status - > delays ) > NUM_DELAYS )
s_lock_stuck ( status - > ptr , status - > file , status - > line ) ;
if ( status - > cur_delay = = 0 ) /* first time to delay? */
status - > cur_delay = MIN_DELAY_USEC ;
pg_usleep ( status - > cur_delay ) ;
# if defined(S_LOCK_TEST)
fprintf ( stdout , " * " ) ;
fflush ( stdout ) ;
fprintf ( stdout , " * " ) ;
fflush ( stdout ) ;
# endif
/* increase delay by a random fraction between 1X and 2X */
cur_delay + = ( int ) ( cur_delay *
/* increase delay by a random fraction between 1X and 2X */
status - > cur_delay + = ( int ) ( status - > cur_delay *
( ( double ) random ( ) / ( double ) MAX_RANDOM_VALUE ) + 0.5 ) ;
/* wrap back to minimum delay when max is exceeded */
if ( cur_delay > MAX_DELAY_USEC )
cur_delay = MIN_DELAY_USEC ;
/* wrap back to minimum delay when max is exceeded */
if ( status - > cur_delay > MAX_DELAY_USEC )
status - > cur_delay = MIN_DELAY_USEC ;
spins = 0 ;
}
status - > spins = 0 ;
}
}
/*
* If we were able to acquire the lock without delaying , it ' s a good
* indication we are in a multiprocessor . If we had to delay , it ' s a sign
* ( but not a sure thing ) that we are in a uniprocessor . Hence , we
* decrement spins_per_delay slowly when we had to delay , and increase it
* rapidly when we didn ' t . It ' s expected that spins_per_delay will
* converge to the minimum value on a uniprocessor and to the maximum
* value on a multiprocessor .
*
* Note : spins_per_delay is local within our current process . We want to
* average these observations across multiple backends , since it ' s
* relatively rare for this function to even get entered , and so a single
* backend might not live long enough to converge on a good value . That
* is handled by the two routines below .
*/
if ( cur_delay = = 0 )
/*
* After acquiring a spinlock , update estimates about how long to loop .
*
* If we were able to acquire the lock without delaying , it ' s a good
* indication we are in a multiprocessor . If we had to delay , it ' s a sign
* ( but not a sure thing ) that we are in a uniprocessor . Hence , we
* decrement spins_per_delay slowly when we had to delay , and increase it
* rapidly when we didn ' t . It ' s expected that spins_per_delay will
* converge to the minimum value on a uniprocessor and to the maximum
* value on a multiprocessor .
*
* Note : spins_per_delay is local within our current process . We want to
* average these observations across multiple backends , since it ' s
* relatively rare for this function to even get entered , and so a single
* backend might not live long enough to converge on a good value . That
* is handled by the two routines below .
*/
void
finish_spin_delay ( SpinDelayStatus * status )
{
if ( status - > cur_delay = = 0 )
{
/* we never had to delay */
if ( spins_per_delay < MAX_SPINS_PER_DELAY )
@ -151,22 +181,8 @@ s_lock(volatile slock_t *lock, const char *file, int line)
if ( spins_per_delay > MIN_SPINS_PER_DELAY )
spins_per_delay = Max ( spins_per_delay - 1 , MIN_SPINS_PER_DELAY ) ;
}
return delays ;
}
# ifdef USE_DEFAULT_S_UNLOCK
void
s_unlock ( volatile slock_t * lock )
{
# ifdef TAS_ACTIVE_WORD
/* HP's PA-RISC */
* TAS_ACTIVE_WORD ( lock ) = - 1 ;
# else
* lock = 0 ;
# endif
}
# endif
/*
* Set local copy of spins_per_delay during backend startup .
*