@ -15,6 +15,7 @@
# include <sys/select.h>
# endif
# include "datatype/timestamp.h"
# include "libpq-fe.h"
# include "pqexpbuffer.h"
# include "pg_getopt.h"
@ -237,54 +238,6 @@ main(int argc, char **argv)
appendPQExpBufferStr ( & wait_query ,
" ) "
" AND holder.mode = ANY (CASE waiter.mode "
" WHEN 'AccessShareLock' THEN ARRAY[ "
" 'AccessExclusiveLock'] "
" WHEN 'RowShareLock' THEN ARRAY[ "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'RowExclusiveLock' THEN ARRAY[ "
" 'ShareLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'ShareUpdateExclusiveLock' THEN ARRAY[ "
" 'ShareUpdateExclusiveLock', "
" 'ShareLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'ShareLock' THEN ARRAY[ "
" 'RowExclusiveLock', "
" 'ShareUpdateExclusiveLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'ShareRowExclusiveLock' THEN ARRAY[ "
" 'RowExclusiveLock', "
" 'ShareUpdateExclusiveLock', "
" 'ShareLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'ExclusiveLock' THEN ARRAY[ "
" 'RowShareLock', "
" 'RowExclusiveLock', "
" 'ShareUpdateExclusiveLock', "
" 'ShareLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] "
" WHEN 'AccessExclusiveLock' THEN ARRAY[ "
" 'AccessShareLock', "
" 'RowShareLock', "
" 'RowExclusiveLock', "
" 'ShareUpdateExclusiveLock', "
" 'ShareLock', "
" 'ShareRowExclusiveLock', "
" 'ExclusiveLock', "
" 'AccessExclusiveLock'] END) "
" AND holder.locktype IS NOT DISTINCT FROM waiter.locktype "
" AND holder.database IS NOT DISTINCT FROM waiter.database "
" AND holder.relation IS NOT DISTINCT FROM waiter.relation "
@ -467,34 +420,48 @@ report_error_message(Step *step)
}
/*
* As above , but reports messages possibly emitted by two steps . This is
* As above , but reports messages possibly emitted by multiple steps . This is
* useful when we have a blocked command awakened by another one ; we want to
* report both messages identically , for the case where we don ' t care which
* report all messages identically , for the case where we don ' t care which
* one fails due to a timeout such as deadlock timeout .
*/
static void
report_two _error_messages ( Step * step1 , Step * step2 )
report_multiple _error_messages ( Step * step , int nextra , Step * * extra step)
{
char * prefix ;
prefix = psprintf ( " %s %s " , step1 - > name , step2 - > name ) ;
PQExpBufferData buffer ;
int n ;
if ( step1 - > errormsg )
if ( nextra = = 0 )
{
fprintf ( stdout , " error in steps %s: %s \n " , prefix ,
step1 - > errormsg ) ;
free ( step1 - > errormsg ) ;
step1 - > errormsg = NULL ;
report_error_message ( step ) ;
return ;
}
if ( step2 - > errormsg )
initPQExpBuffer ( & buffer ) ;
appendPQExpBufferStr ( & buffer , step - > name ) ;
for ( n = 0 ; n < nextra ; + + n )
appendPQExpBuffer ( & buffer , " %s " , extrastep [ n ] - > name ) ;
if ( step - > errormsg )
{
fprintf ( stdout , " error in steps %s: %s \n " , prefix ,
step2 - > errormsg ) ;
free ( step2 - > errormsg ) ;
step2 - > errormsg = NULL ;
fprintf ( stdout , " error in steps %s: %s \n " , buffer . data ,
step - > errormsg ) ;
free ( step - > errormsg ) ;
step - > errormsg = NULL ;
}
free ( prefix ) ;
for ( n = 0 ; n < nextra ; + + n )
{
if ( extrastep [ n ] - > errormsg = = NULL )
continue ;
fprintf ( stdout , " error in steps %s: %s \n " ,
buffer . data , extrastep [ n ] - > errormsg ) ;
free ( extrastep [ n ] - > errormsg ) ;
extrastep [ n ] - > errormsg = NULL ;
}
termPQExpBuffer ( & buffer ) ;
}
/*
@ -505,7 +472,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
{
PGresult * res ;
int i ;
Step * waiting = NULL ;
int w ;
int nwaiting = 0 ;
int nerrorstep = 0 ;
Step * * waiting ;
Step * * errorstep ;
waiting = malloc ( sizeof ( Step * ) * testspec - > nsessions ) ;
errorstep = malloc ( sizeof ( Step * ) * testspec - > nsessions ) ;
/*
* In dry run mode , just display the permutation in the same format used
@ -566,54 +540,76 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
for ( i = 0 ; i < nsteps ; i + + )
{
Step * step = steps [ i ] ;
Step * oldstep = NULL ;
PGconn * conn = conns [ 1 + step - > session ] ;
bool mustwait ;
if ( waiting ! = NULL & & step - > session = = waiting - > session )
/*
* Check whether the session that needs to perform the next step
* is still blocked on an earlier step . If so , wait for it to finish .
*
* In older versions of this tool , when allowed precisely one session
* to be waiting at a time . If we reached a step which required that
* session to execute the next command , we would declare the whole
* permutation invalid , cancel everything , and move on to the next one .
* Unfortunately , that made it impossible to test the deadlock detector
* using this framework unless the numebr of processes involved in the
* deadlock was precisely two . We now assume that if we reach a step
* that is still blocked , we need to wait for it to unblock itself .
*/
for ( w = 0 ; w < nwaiting ; + + w )
{
PGcancel * cancel ;
PGresult * res ;
int j ;
/*
* This permutation is invalid : it can never happen in real life .
*
* A session is blocked on an earlier step ( waiting ) and no
* further steps from this session can run until it is unblocked ,
* but it can only be unblocked by running steps from other
* sessions .
*/
fprintf ( stderr , " invalid permutation detected \n " ) ;
/* Cancel the waiting statement from this session. */
cancel = PQgetCancel ( conn ) ;
if ( cancel ! = NULL )
if ( step - > session = = waiting [ w ] - > session )
{
char buf [ 256 ] ;
if ( ! PQcancel ( cancel , buf , sizeof ( buf ) ) )
fprintf ( stderr , " PQcancel failed: %s \n " , buf ) ;
/* Be sure to consume the error message. */
while ( ( res = PQgetResult ( conn ) ) ! = NULL )
PQclear ( res ) ;
PQfreeCancel ( cancel ) ;
oldstep = waiting [ w ] ;
break ;
}
}
if ( oldstep ! = NULL )
{
/* Wait for previous step on this connection. */
try_complete_step ( oldstep , STEP_RETRY ) ;
/*
* Now we really have to complete all the running transactions to
* make sure teardown doesn ' t block .
* Attempt to complete any steps that were previously waiting .
* Remove any that have completed , and also remove the one for
* which we just waited .
*/
for ( j = 1 ; j < nconns ; j + + )
w = 0 ;
nerrorstep = 0 ;
while ( w < nwaiting )
{
res = PQexec ( conns [ j ] , " ROLLBACK " ) ;
if ( res ! = NULL )
PQclear ( res ) ;
if ( waiting [ w ] = = oldstep )
{
/* We just waited for this; remove it. */
if ( w + 1 < nwaiting )
memcpy ( & waiting [ w ] , & waiting [ w + 1 ] ,
( nwaiting - ( w + 1 ) ) * sizeof ( Step * ) ) ;
nwaiting - - ;
continue ;
}
if ( try_complete_step ( waiting [ w ] , STEP_NONBLOCK | STEP_RETRY ) )
{
/* Still blocked on a lock, leave it alone. */
w + + ;
}
else
{
/* This one finished, too! */
errorstep [ nerrorstep + + ] = waiting [ w ] ;
if ( w + 1 < nwaiting )
memcpy ( & waiting [ w ] , & waiting [ w + 1 ] ,
( nwaiting - ( w + 1 ) ) * sizeof ( Step * ) ) ;
nwaiting - - ;
}
}
goto teardown ;
/* Report all errors together. */
report_multiple_error_messages ( oldstep , nerrorstep , errorstep ) ;
}
/* Send the query for this step. */
if ( ! PQsendQuery ( conn , step - > sql ) )
{
fprintf ( stdout , " failed to send query for step %s: %s \n " ,
@ -621,39 +617,41 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
exit_nicely ( ) ;
}
if ( waiting ! = NULL )
{
/* Some other step is already waiting: just block. */
try_complete_step ( step , 0 ) ;
/* Try to complete this step without blocking. */
mustwait = try_complete_step ( step , STEP_NONBLOCK ) ;
/*
* See if this step unblocked the waiting step ; report both error
* messages together if so .
*/
if ( ! try_complete_step ( waiting , STEP_NONBLOCK | STEP_RETRY ) )
/* Attempt to complete any steps that were previously waiting. */
w = 0 ;
nerrorstep = 0 ;
while ( w < nwaiting )
{
if ( try_complete_step ( waiting [ w ] , STEP_NONBLOCK | STEP_RETRY ) )
w + + ;
else
{
report_two_error_messages ( step , waiting ) ;
waiting = NULL ;
errorstep [ nerrorstep + + ] = waiting [ w ] ;
if ( w + 1 < nwaiting )
memcpy ( & waiting [ w ] , & waiting [ w + 1 ] ,
( nwaiting - ( w + 1 ) ) * sizeof ( Step * ) ) ;
nwaiting - - ;
}
else
report_error_message ( step ) ;
}
else
{
if ( try_complete_step ( step , STEP_NONBLOCK ) )
waiting = step ;
report_error_message ( step ) ;
}
/* Report any error from this step, and any steps that it unblocked. */
report_multiple_error_messages ( step , nerrorstep , errorstep ) ;
/* If this step is waiting, add it to the array of waiters. */
if ( mustwait )
waiting [ nwaiting + + ] = step ;
}
/* Finish any waiting query. */
if ( waiting ! = NULL )
for ( w = 0 ; w < nwaiting ; + + w )
{
try_complete_step ( waiting , STEP_RETRY ) ;
report_error_message ( waiting ) ;
try_complete_step ( waiting [ w ] , STEP_RETRY ) ;
report_error_message ( waiting [ w ] ) ;
}
teardown :
/* Perform per-session teardown */
for ( i = 0 ; i < testspec - > nsessions ; i + + )
{
@ -714,14 +712,17 @@ try_complete_step(Step *step, int flags)
{
PGconn * conn = conns [ 1 + step - > session ] ;
fd_set read_set ;
struct timeval start_time ;
struct timeval timeout ;
int sock = PQsocket ( conn ) ;
int ret ;
PGresult * res ;
bool canceled = false ;
gettimeofday ( & start_time , NULL ) ;
FD_ZERO ( & read_set ) ;
while ( ( flags & STEP_NONBLOCK ) & & PQisBusy ( conn ) )
while ( PQisBusy ( conn ) )
{
FD_SET ( sock , & read_set ) ;
timeout . tv_sec = 0 ;
@ -737,28 +738,75 @@ try_complete_step(Step *step, int flags)
}
else if ( ret = = 0 ) /* select() timeout: check for lock wait */
{
int ntuples ;
struct timeval current_time ;
int64 td ;
res = PQexecPrepared ( conns [ 0 ] , PREP_WAITING , 1 ,
& backend_pids [ step - > session + 1 ] ,
NULL , NULL , 0 ) ;
if ( PQresultStatus ( res ) ! = PGRES_TUPLES_OK )
/* If it's OK for the step to block, check whether it has. */
if ( flags & STEP_NONBLOCK )
{
fprintf ( stderr , " lock wait query failed: %s " ,
PQerrorMessage ( conn ) ) ;
exit_nicely ( ) ;
int ntuples ;
res = PQexecPrepared ( conns [ 0 ] , PREP_WAITING , 1 ,
& backend_pids [ step - > session + 1 ] ,
NULL , NULL , 0 ) ;
if ( PQresultStatus ( res ) ! = PGRES_TUPLES_OK )
{
fprintf ( stderr , " lock wait query failed: %s " ,
PQerrorMessage ( conn ) ) ;
exit_nicely ( ) ;
}
ntuples = PQntuples ( res ) ;
PQclear ( res ) ;
if ( ntuples > = 1 ) /* waiting to acquire a lock */
{
if ( ! ( flags & STEP_RETRY ) )
printf ( " step %s: %s <waiting ...> \n " ,
step - > name , step - > sql ) ;
return true ;
}
/* else, not waiting */
}
ntuples = PQntuples ( res ) ;
PQclear ( res ) ;
if ( ntuples > = 1 ) /* waiting to acquire a lock */
/* Figure out how long we've been waiting for this step. */
gettimeofday ( & current_time , NULL ) ;
td = ( int64 ) current_time . tv_sec - ( int64 ) start_time . tv_sec ;
td * = USECS_PER_SEC ;
td + = ( int64 ) current_time . tv_usec - ( int64 ) start_time . tv_usec ;
/*
* After 60 seconds , try to cancel the query .
*
* If the user tries to test an invalid permutation , we don ' t
* want to hang forever , especially when this is running in the
* buildfarm . So try to cancel it after a minute . This will
* presumably lead to this permutation failing , but remaining
* permutations and tests should still be OK .
*/
if ( td > 60 * USECS_PER_SEC & & ! canceled )
{
if ( ! ( flags & STEP_RETRY ) )
printf ( " step %s: %s <waiting ...> \n " ,
step - > name , step - > sql ) ;
return true ;
PGcancel * cancel = PQgetCancel ( conn ) ;
char buf [ 256 ] ;
if ( cancel ! = NULL & & ! PQcancel ( cancel , buf , sizeof ( buf ) ) )
fprintf ( stderr , " PQcancel failed: %s \n " , buf ) ;
if ( cancel ! = NULL )
PQfreeCancel ( cancel ) ;
}
/*
* After 75 seconds , just give up and die .
*
* Since cleanup steps won ' t be run in this case , this may cause
* later tests to fail . That stinks , but it ' s better than waiting
* forever for the server to respond to the cancel .
*/
if ( td > 75 * USECS_PER_SEC )
{
fprintf ( stderr , " step %s timed out after 75 seconds \n " ,
step - > name ) ;
exit_nicely ( ) ;
}
/* else, not waiting: give it more time */
}
else if ( ! PQconsumeInput ( conn ) ) /* select(): data available */
{