Restart bgworkers immediately after a crash-and-restart cycle.

Just as we would start bgworkers immediately after an initial startup
of the server, we should restart them immediately when reinitializing.

Petr Jelinek and Robert Haas
pull/6/head
Robert Haas 12 years ago
parent 364ddc3e5c
commit 970d1f76d1
  1. 34
      src/backend/postmaster/bgworker.c
  2. 7
      src/backend/postmaster/postmaster.c
  3. 1
      src/include/postmaster/bgworker_internals.h

@ -394,6 +394,27 @@ BackgroundWorkerStopNotifications(pid_t pid)
}
}
/*
* Reset background worker crash state.
*
* We assume that, after a crash-and-restart cycle, background workers should
* be restarted immediately, instead of waiting for bgw_restart_time to
* elapse.
*/
void
ResetBackgroundWorkerCrashTimes(void)
{
slist_mutable_iter iter;
slist_foreach_modify(iter, &BackgroundWorkerList)
{
RegisteredBgWorker *rw;
rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
rw->rw_crashed_at = 0;
}
}
#ifdef EXEC_BACKEND
/*
* In EXEC_BACKEND mode, workers use this to retrieve their details from
@ -478,13 +499,14 @@ bgworker_quickdie(SIGNAL_ARGS)
on_exit_reset();
/*
* Note we do exit(0) here, not exit(2) like quickdie. The reason is that
* we don't want to be seen this worker as independently crashed, because
* then postmaster would delay restarting it again afterwards. If some
* idiot DBA manually sends SIGQUIT to a random bgworker, the "dead man
* switch" will ensure that postmaster sees this as a crash.
* Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our
* shared memory state. (The "dead man switch" mechanism in pmsignal.c
* should ensure the postmaster sees this as a crash, too, but no harm in
* being doubly sure.)
*/
exit(0);
exit(2);
}
/*

@ -2616,7 +2616,7 @@ reaper(SIGNAL_ARGS)
if (PgStatPID == 0)
PgStatPID = pgstat_start();
/* some workers may be scheduled to start now */
/* workers may be scheduled to start now */
maybe_start_bgworker();
/* at this point we are really open for business */
@ -2860,7 +2860,6 @@ CleanupBackgroundWorker(int pid,
{
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
{
rw->rw_crashed_at = GetCurrentTimestamp();
HandleChildCrash(pid, exitstatus, namebuf);
return true;
}
@ -2871,7 +2870,6 @@ CleanupBackgroundWorker(int pid,
* Uh-oh, the child failed to clean itself up. Treat as a
* crash after all.
*/
rw->rw_crashed_at = GetCurrentTimestamp();
HandleChildCrash(pid, exitstatus, namebuf);
return true;
}
@ -3546,6 +3544,9 @@ PostmasterStateMachine(void)
ereport(LOG,
(errmsg("all server processes terminated; reinitializing")));
/* allow background workers to immediately restart */
ResetBackgroundWorkerCrashTimes();
shmem_exit(1);
reset_shared(PostPortNumber);

@ -43,6 +43,7 @@ extern void BackgroundWorkerStateChange(void);
extern void ForgetBackgroundWorker(slist_mutable_iter *cur);
extern void ReportBackgroundWorkerPID(RegisteredBgWorker *);
extern void BackgroundWorkerStopNotifications(pid_t pid);
extern void ResetBackgroundWorkerCrashTimes(void);
/* Function to start a background worker, called from postmaster.c */
extern void StartBackgroundWorker(void);

Loading…
Cancel
Save