Skip to content

Commit

Permalink
master.c: Process babysat daemons before regular daemons
Browse files Browse the repository at this point in the history
The logic for "wait 10 seconds then retry" for daemons that are failing
frequently doesn't seem like it ever worked. The reason is that we'd respawn
children for *any* services missing children before checking babysat
services.

Reverse the order of checks to see if a service is babysat and needs a child
before spawning off children.

This does mean that after 10+ seconds, we'll only spawn one child at first.
Later we'll come along and spawn more.
  • Loading branch information
wolfsage committed Feb 19, 2025
1 parent a0e65a5 commit 31be7fb
Showing 1 changed file with 22 additions and 22 deletions.
44 changes: 22 additions & 22 deletions master/master.c
Original file line number Diff line number Diff line change
Expand Up @@ -2839,24 +2839,7 @@ static void reread_conf(struct timeval now)

static void check_undermanned(struct service *s, int si, int wdi)
{
if (s->exec /* enabled */ &&
(s->nactive < s->max_workers) &&
(s->ready_workers < s->desired_workers))
{
/* bring us up to desired_workers */
int j = s->desired_workers - s->ready_workers;

if (verbose) {
syslog(LOG_DEBUG, "service %s/%s needs %d more ready workers",
s->name, s->familyname, j);
}

while (j-- > 0) {
spawn_service(s, si, wdi);
}
} else if (s->exec
&& s->babysit
&& s->nactive == 0) {
if (s->exec && s->babysit && s->nactive == 0) {
if (s->nreadyfails >= MAX_READY_FAILS) {
// if not yet timed out, just wait
time_t now = time(NULL);
Expand All @@ -2869,11 +2852,28 @@ static void check_undermanned(struct service *s, int si, int wdi)
s->nreadyfails--;
s->lastreadyfail = now;
}
syslog(LOG_ERR,
"lost all children for service: %s/%s. " \
"Applying babysitter.",
s->name, s->familyname);
if (s->lastreadyfail) {
syslog(LOG_ERR,
"lost all children for service: %s/%s. " \
"Applying babysitter.",
s->name, s->familyname);
}
spawn_service(s, si, wdi);
} else if (s->exec /* enabled */
&& (s->nactive < s->max_workers)
&& (s->ready_workers < s->desired_workers))
{
/* bring us up to desired_workers */
int j = s->desired_workers - s->ready_workers;

if (verbose) {
syslog(LOG_DEBUG, "service %s/%s needs %d more ready workers",
s->name, s->familyname, j);
}

while (j-- > 0) {
spawn_service(s, si, wdi);
}
} else if (!s->exec /* disabled */ &&
s->name /* not yet removed */ &&
s->nactive == 0) {
Expand Down

0 comments on commit 31be7fb

Please sign in to comment.