@ -420,6 +420,7 @@ static void TerminateChildren(int signal);
# define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
static int CountChildren ( int target ) ;
static bool assign_backendlist_entry ( RegisteredBgWorker * rw ) ;
static void maybe_start_bgworker ( void ) ;
static bool CreateOptsFile ( int argc , char * argv [ ] , char * fullprogname ) ;
static pid_t StartChildProcess ( AuxProcType type ) ;
@ -5531,13 +5532,33 @@ bgworker_forkexec(int shmem_slot)
* Start a new bgworker .
* Starting time conditions must have been checked already .
*
* Returns true on success , false on failure .
* In either case , update the RegisteredBgWorker ' s state appropriately .
*
* This code is heavily based on autovacuum . c , q . v .
*/
static void
static bool
do_start_bgworker ( RegisteredBgWorker * rw )
{
pid_t worker_pid ;
Assert ( rw - > rw_pid = = 0 ) ;
/*
* Allocate and assign the Backend element . Note we must do this before
* forking , so that we can handle out of memory properly .
*
* Treat failure as though the worker had crashed . That way , the
* postmaster will wait a bit before attempting to start it again ; if it
* tried again right away , most likely it ' d find itself repeating the
* out - of - memory or fork failure condition .
*/
if ( ! assign_backendlist_entry ( rw ) )
{
rw - > rw_crashed_at = GetCurrentTimestamp ( ) ;
return false ;
}
ereport ( DEBUG1 ,
( errmsg ( " starting background worker process \" %s \" " ,
rw - > rw_worker . bgw_name ) ) ) ;
@ -5549,9 +5570,17 @@ do_start_bgworker(RegisteredBgWorker *rw)
# endif
{
case - 1 :
/* in postmaster, fork failed ... */
ereport ( LOG ,
( errmsg ( " could not fork worker process: %m " ) ) ) ;
return ;
/* undo what assign_backendlist_entry did */
ReleasePostmasterChildSlot ( rw - > rw_child_slot ) ;
rw - > rw_child_slot = 0 ;
free ( rw - > rw_backend ) ;
rw - > rw_backend = NULL ;
/* mark entry as crashed, so we'll try again later */
rw - > rw_crashed_at = GetCurrentTimestamp ( ) ;
break ;
# ifndef EXEC_BACKEND
case 0 :
@ -5575,14 +5604,24 @@ do_start_bgworker(RegisteredBgWorker *rw)
PostmasterContext = NULL ;
StartBackgroundWorker ( ) ;
exit ( 1 ) ; /* should not get here */
break ;
# endif
default :
/* in postmaster, fork successful ... */
rw - > rw_pid = worker_pid ;
rw - > rw_backend - > pid = rw - > rw_pid ;
ReportBackgroundWorkerPID ( rw ) ;
break ;
/* add new worker to lists of backends */
dlist_push_head ( & BackendList , & rw - > rw_backend - > elem ) ;
# ifdef EXEC_BACKEND
ShmemBackendArrayAdd ( rw - > rw_backend ) ;
# endif
return true ;
}
return false ;
}
/*
@ -5629,6 +5668,8 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
* Allocate the Backend struct for a connected background worker , but don ' t
* add it to the list of backends just yet .
*
* On failure , return false without changing any worker state .
*
* Some info from the Backend is copied into the passed rw .
*/
static bool
@ -5647,8 +5688,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
ereport ( LOG ,
( errcode ( ERRCODE_INTERNAL_ERROR ) ,
errmsg ( " could not generate random cancel key " ) ) ) ;
rw - > rw_crashed_at = GetCurrentTimestamp ( ) ;
return false ;
}
@ -5658,14 +5697,6 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
ereport ( LOG ,
( errcode ( ERRCODE_OUT_OF_MEMORY ) ,
errmsg ( " out of memory " ) ) ) ;
/*
* The worker didn ' t really crash , but setting this nonzero makes
* postmaster wait a bit before attempting to start it again ; if it
* tried again right away , most likely it ' d find itself under the same
* memory pressure .
*/
rw - > rw_crashed_at = GetCurrentTimestamp ( ) ;
return false ;
}
@ -5687,6 +5718,11 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
* As a side effect , the bgworker control variables are set or reset whenever
* there are more workers to start after this one , and whenever the overall
* system state requires it .
*
* The reason we start at most one worker per call is to avoid consuming the
* postmaster ' s attention for too long when many such requests are pending .
* As long as StartWorkerNeeded is true , ServerLoop will not block and will
* call this function again after dealing with any other issues .
*/
static void
maybe_start_bgworker ( void )
@ -5694,13 +5730,19 @@ maybe_start_bgworker(void)
slist_mutable_iter iter ;
TimestampTz now = 0 ;
/*
* During crash recovery , we have no need to be called until the state
* transition out of recovery .
*/
if ( FatalError )
{
StartWorkerNeeded = false ;
HaveCrashedWorker = false ;
return ; /* not yet */
return ;
}
/* Don't need to be called again unless we find a reason for it below */
StartWorkerNeeded = false ;
HaveCrashedWorker = false ;
slist_foreach_modify ( iter , & BackgroundWorkerList )
@ -5709,11 +5751,11 @@ maybe_start_bgworker(void)
rw = slist_container ( RegisteredBgWorker , rw_lnode , iter . cur ) ;
/* already running? */
/* ignore if already running */
if ( rw - > rw_pid ! = 0 )
continue ;
/* marked for death? */
/* if marked for death, clean up and remove from list */
if ( rw - > rw_terminate )
{
ForgetBackgroundWorker ( & iter ) ;
@ -5735,12 +5777,14 @@ maybe_start_bgworker(void)
continue ;
}
/* read system time only when needed */
if ( now = = 0 )
now = GetCurrentTimestamp ( ) ;
if ( ! TimestampDifferenceExceeds ( rw - > rw_crashed_at , now ,
rw - > rw_worker . bgw_restart_time * 1000 ) )
{
/* Set flag to remember that we have workers to start later */
HaveCrashedWorker = true ;
continue ;
}
@ -5748,35 +5792,35 @@ maybe_start_bgworker(void)
if ( bgworker_should_start_now ( rw - > rw_worker . bgw_start_time ) )
{
/* reset crash time before calling assign_backendlist_entry */
/* reset crash time before trying to start worker */
rw - > rw_crashed_at = 0 ;
/*
* Allocate and assign the Backend element . Note we must do this
* before forking , so that we can handle out of memory properly .
* Try to start the worker .
*
* On failure , give up processing workers for now , but set
* StartWorkerNeeded so we ' ll come back here on the next iteration
* of ServerLoop to try again . ( We don ' t want to wait , because
* there might be additional ready - to - run workers . ) We could set
* HaveCrashedWorker as well , since this worker is now marked
* crashed , but there ' s no need because the next run of this
* function will do that .
*/
if ( ! assign_backendlist_entry ( rw ) )
if ( ! do_start_bgworker ( rw ) )
{
StartWorkerNeeded = true ;
return ;
do_start_bgworker ( rw ) ; /* sets rw->rw_pid */
dlist_push_head ( & BackendList , & rw - > rw_backend - > elem ) ;
# ifdef EXEC_BACKEND
ShmemBackendArrayAdd ( rw - > rw_backend ) ;
# endif
}
/*
* Have ServerLoop call us again . Note that there might not
* actually * be * another runnable worker , but we don ' t care all
* that much ; we will find out the next time we run .
* Quit , but have ServerLoop call us again to look for additional
* ready - to - run workers . There might not be any , but we ' ll find
* out the next time we run .
*/
StartWorkerNeeded = true ;
return ;
}
}
/* no runnable worker found */
StartWorkerNeeded = false ;
}
/*