Skip to content

Commit

Permalink
Silent late healthcheck requests
Browse files Browse the repository at this point in the history
Previously, when healthchecks were stopped after a deploy complete, it
was possible for one or more additional healthcheck results to be
reported. This can happen for two reasons:

- If the check is slower than the interval, it was possible that another
  check would be scheduled even when the shutdown channel is closed.
  This was due to the `select` choosing a path at random, in the case
  that the shutdown condition and the timer condition were both ready.

- If the shutdown happens while a check is in progress, the check would
  still eventually complete and report its result, even though we no
  longer want it.

To avoid this we do two things:

- Before starting a check, additionally ensure the shutdown channel is
  not also closed. If it is, exit the loop instead.
- When a check completes, only report the result if we're not shut down.
  • Loading branch information
kevinmcconnell committed Nov 26, 2024
1 parent c7ad33d commit 0e15170
Showing 1 changed file with 37 additions and 11 deletions.
48 changes: 37 additions & 11 deletions internal/server/health_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package server

import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"net/url"
Expand All @@ -12,6 +14,11 @@ const (
healthCheckUserAgent = "kamal-proxy"
)

var (
ErrorHealthCheckRequestTimedOut = errors.New("Request timed out")
ErrorHealthCheckUnexpectedStatus = errors.New("Unexpected status")
)

type HealthCheckConsumer interface {
HealthCheckCompleted(success bool)
}
Expand Down Expand Up @@ -53,11 +60,16 @@ func (hc *HealthCheck) run() {

for {
select {
case <-ticker.C:
hc.check()

case <-hc.shutdown:
return
case <-ticker.C:
select {
case <-hc.shutdown: // Prioritize shutdown over check
return
default:
hc.check()
}

}
}
}
Expand All @@ -68,27 +80,41 @@ func (hc *HealthCheck) check() {

req, err := http.NewRequestWithContext(ctx, http.MethodGet, hc.endpoint.String(), nil)
if err != nil {
slog.Error("Unable to create healthcheck request", "error", err)
hc.consumer.HealthCheckCompleted(false)
hc.reportResult(false, err)
return
}

req.Header.Set("User-Agent", healthCheckUserAgent)

resp, err := http.DefaultClient.Do(req)
if err != nil {
slog.Info("Healthcheck failed", "error", err)
hc.consumer.HealthCheckCompleted(false)
if errors.Is(err, context.DeadlineExceeded) {
err = ErrorHealthCheckRequestTimedOut
}
hc.reportResult(false, err)
return
}
defer resp.Body.Close()

if resp.StatusCode < 200 || resp.StatusCode > 299 {
slog.Info("Healthcheck failed", "status", resp.StatusCode)
hc.consumer.HealthCheckCompleted(false)
hc.reportResult(false, fmt.Errorf("%w (%d)", ErrorHealthCheckUnexpectedStatus, resp.StatusCode))
return
}

slog.Info("Healthcheck succeeded", "status", resp.StatusCode)
hc.consumer.HealthCheckCompleted(true)
hc.reportResult(true, nil)
}

func (hc *HealthCheck) reportResult(success bool, err error) {
select {
case <-hc.shutdown:
return // Ignore late results after close
default:
if success {
slog.Info("Healthcheck succeeded")
} else {
slog.Info("Healthcheck failed", "error", err)
}

hc.consumer.HealthCheckCompleted(success)
}
}

0 comments on commit 0e15170

Please sign in to comment.