Skip to content

Commit 3f9e104

Browse files
authored
Merge pull request #4427 from coderbirju/automate_health_checks_feat
add healthcheck orchestration logic
2 parents 8d75a0a + 7216405 commit 3f9e104

27 files changed

+903
-69
lines changed

cmd/nerdctl/compose/compose_start.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ import (
2828
"github.com/containerd/errdefs"
2929

3030
"github.com/containerd/nerdctl/v2/cmd/nerdctl/helpers"
31+
"github.com/containerd/nerdctl/v2/pkg/api/types"
3132
"github.com/containerd/nerdctl/v2/pkg/clientutil"
3233
"github.com/containerd/nerdctl/v2/pkg/cmd/compose"
34+
"github.com/containerd/nerdctl/v2/pkg/config"
3335
"github.com/containerd/nerdctl/v2/pkg/containerutil"
3436
"github.com/containerd/nerdctl/v2/pkg/labels"
3537
)
@@ -86,15 +88,15 @@ func startAction(cmd *cobra.Command, args []string) error {
8688
return fmt.Errorf("service %q has no container to start", svcName)
8789
}
8890

89-
if err := startContainers(ctx, client, containers); err != nil {
91+
if err := startContainers(ctx, client, containers, &globalOptions); err != nil {
9092
return err
9193
}
9294
}
9395

9496
return nil
9597
}
9698

97-
func startContainers(ctx context.Context, client *containerd.Client, containers []containerd.Container) error {
99+
func startContainers(ctx context.Context, client *containerd.Client, containers []containerd.Container, globalOptions *types.GlobalCommandOptions) error {
98100
eg, ctx := errgroup.WithContext(ctx)
99101
for _, c := range containers {
100102
c := c
@@ -112,7 +114,7 @@ func startContainers(ctx context.Context, client *containerd.Client, containers
112114
}
113115

114116
// in compose, always disable attach
115-
if err := containerutil.Start(ctx, c, false, false, client, ""); err != nil {
117+
if err := containerutil.Start(ctx, c, false, false, client, "", (*config.Config)(globalOptions)); err != nil {
116118
return err
117119
}
118120
info, err := c.Info(ctx, containerd.WithoutRefreshedMetadata)

cmd/nerdctl/container/container_create.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,6 @@ func createOptions(cmd *cobra.Command) (types.ContainerCreateOptions, error) {
279279
if err != nil {
280280
return opt, err
281281
}
282-
opt.HealthStartInterval, err = cmd.Flags().GetDuration("health-start-interval")
283-
if err != nil {
284-
return opt, err
285-
}
286282
opt.NoHealthcheck, err = cmd.Flags().GetBool("no-healthcheck")
287283
if err != nil {
288284
return opt, err

cmd/nerdctl/container/container_health_check_test.go renamed to cmd/nerdctl/container/container_health_check_linux_test.go

Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/containerd/nerdctl/mod/tigron/tig"
3333

3434
"github.com/containerd/nerdctl/v2/pkg/healthcheck"
35+
"github.com/containerd/nerdctl/v2/pkg/rootlessutil"
3536
"github.com/containerd/nerdctl/v2/pkg/testutil"
3637
"github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest"
3738
)
@@ -42,6 +43,11 @@ func TestContainerHealthCheckBasic(t *testing.T) {
4243
// Docker CLI does not provide a standalone healthcheck command.
4344
testCase.Require = require.Not(nerdtest.Docker)
4445

46+
// Skip systemd tests in rootless environment to bypass dbus permission issues
47+
if rootlessutil.IsRootless() {
48+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
49+
}
50+
4551
testCase.SubTests = []*test.Case{
4652
{
4753
Description: "Container does not exist",
@@ -139,6 +145,11 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
139145
// Docker CLI does not provide a standalone healthcheck command.
140146
testCase.Require = require.Not(nerdtest.Docker)
141147

148+
// Skip systemd tests in rootless environment to bypass dbus permission issues
149+
if rootlessutil.IsRootless() {
150+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
151+
}
152+
142153
testCase.SubTests = []*test.Case{
143154
{
144155
Description: "Health check timeout scenario",
@@ -602,3 +613,310 @@ func TestContainerHealthCheckAdvance(t *testing.T) {
602613

603614
testCase.Run(t)
604615
}
616+
617+
func TestHealthCheck_SystemdIntegration_Basic(t *testing.T) {
618+
testCase := nerdtest.Setup()
619+
testCase.Require = require.Not(nerdtest.Docker)
620+
// Skip systemd tests in rootless environment to bypass dbus permission issues
621+
if rootlessutil.IsRootless() {
622+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
623+
}
624+
625+
testCase.SubTests = []*test.Case{
626+
{
627+
Description: "Basic healthy container with systemd-triggered healthcheck",
628+
Setup: func(data test.Data, helpers test.Helpers) {
629+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
630+
"--health-cmd", "echo healthy",
631+
"--health-interval", "2s",
632+
testutil.CommonImage, "sleep", "30")
633+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
634+
},
635+
Cleanup: func(data test.Data, helpers test.Helpers) {
636+
// Ensure proper cleanup of systemd units
637+
helpers.Anyhow("stop", data.Identifier())
638+
helpers.Anyhow("rm", "-f", data.Identifier())
639+
},
640+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
641+
return &test.Expected{
642+
ExitCode: 0,
643+
Output: expect.All(func(stdout string, t tig.T) {
644+
var h *healthcheck.Health
645+
646+
// Poll up to 5 times for health status
647+
maxAttempts := 5
648+
var finalStatus string
649+
650+
for i := 0; i < maxAttempts; i++ {
651+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
652+
h = inspect.State.Health
653+
654+
assert.Assert(t, h != nil, "expected health state to be present")
655+
finalStatus = h.Status
656+
657+
// If healthy, break and pass the test
658+
if finalStatus == "healthy" {
659+
t.Log(fmt.Sprintf("Container became healthy on attempt %d/%d", i+1, maxAttempts))
660+
break
661+
}
662+
663+
// If unhealthy, fail immediately
664+
if finalStatus == "unhealthy" {
665+
assert.Assert(t, false, fmt.Sprintf("Container became unhealthy on attempt %d/%d, status: %s", i+1, maxAttempts, finalStatus))
666+
return
667+
}
668+
669+
// If not the last attempt, wait before retrying
670+
if i < maxAttempts-1 {
671+
t.Log(fmt.Sprintf("Attempt %d/%d: status is '%s', waiting 1 second before retry", i+1, maxAttempts, finalStatus))
672+
time.Sleep(1 * time.Second)
673+
}
674+
}
675+
676+
if finalStatus != "healthy" {
677+
assert.Assert(t, false, fmt.Sprintf("Container did not become healthy after %d attempts, final status: %s", maxAttempts, finalStatus))
678+
return
679+
}
680+
681+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
682+
}),
683+
}
684+
},
685+
},
686+
{
687+
Description: "Kill stops healthcheck execution and cleans up systemd timer",
688+
Setup: func(data test.Data, helpers test.Helpers) {
689+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
690+
"--health-cmd", "echo healthy",
691+
"--health-interval", "1s",
692+
testutil.CommonImage, "sleep", "30")
693+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
694+
helpers.Ensure("kill", data.Identifier())
695+
},
696+
Cleanup: func(data test.Data, helpers test.Helpers) {
697+
// Container is already killed, just remove it
698+
helpers.Anyhow("rm", "-f", data.Identifier())
699+
},
700+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
701+
return &test.Expected{
702+
ExitCode: expect.ExitCodeNoCheck,
703+
Output: func(stdout string, t tig.T) {
704+
// Get container info for verification
705+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
706+
containerID := inspect.ID
707+
h := inspect.State.Health
708+
709+
// Verify health state and logs exist
710+
assert.Assert(t, h != nil, "expected health state to be present")
711+
assert.Assert(t, len(h.Log) > 0, "expected at least one health check log entry")
712+
713+
// Ensure systemd timers are removed
714+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
715+
result.Run(&test.Expected{
716+
ExitCode: expect.ExitCodeNoCheck,
717+
Output: func(stdout string, _ tig.T) {
718+
assert.Assert(t, !strings.Contains(stdout, containerID),
719+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
720+
},
721+
})
722+
},
723+
}
724+
},
725+
},
726+
{
727+
Description: "Remove cleans up systemd timer",
728+
Setup: func(data test.Data, helpers test.Helpers) {
729+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
730+
"--health-cmd", "echo healthy",
731+
"--health-interval", "1s",
732+
testutil.CommonImage, "sleep", "30")
733+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
734+
helpers.Ensure("rm", "-f", data.Identifier())
735+
},
736+
Cleanup: func(data test.Data, helpers test.Helpers) {
737+
// Container is already removed, no cleanup needed
738+
helpers.Anyhow("rm", "-f", data.Identifier())
739+
},
740+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
741+
return &test.Expected{
742+
ExitCode: expect.ExitCodeNoCheck,
743+
Output: func(stdout string, t tig.T) {
744+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
745+
containerID := inspect.ID
746+
747+
// Check systemd timers to ensure cleanup
748+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
749+
result.Run(&test.Expected{
750+
ExitCode: expect.ExitCodeNoCheck,
751+
Output: func(stdout string, _ tig.T) {
752+
// Verify systemd timer has been cleaned up by checking systemctl output
753+
// We check that no timer contains our test identifier
754+
assert.Assert(t, !strings.Contains(stdout, containerID),
755+
"expected nerdctl healthcheck timer for container ID %s to be removed after container removal", containerID)
756+
},
757+
})
758+
},
759+
}
760+
},
761+
},
762+
{
763+
Description: "Stop cleans up systemd timer",
764+
Setup: func(data test.Data, helpers test.Helpers) {
765+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
766+
"--health-cmd", "echo healthy",
767+
"--health-interval", "1s",
768+
testutil.CommonImage, "sleep", "30")
769+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
770+
helpers.Ensure("stop", data.Identifier())
771+
},
772+
Cleanup: func(data test.Data, helpers test.Helpers) {
773+
// Container is already stopped, just remove it
774+
helpers.Anyhow("rm", "-f", data.Identifier())
775+
},
776+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
777+
return &test.Expected{
778+
ExitCode: expect.ExitCodeNoCheck,
779+
Output: func(stdout string, t tig.T) {
780+
// Get container info for verification
781+
inspect := nerdtest.InspectContainer(helpers, data.Identifier())
782+
containerID := inspect.ID
783+
784+
// Ensure systemd timers are removed
785+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
786+
result.Run(&test.Expected{
787+
ExitCode: expect.ExitCodeNoCheck,
788+
Output: func(stdout string, _ tig.T) {
789+
assert.Assert(t, !strings.Contains(stdout, containerID),
790+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
791+
},
792+
})
793+
},
794+
}
795+
},
796+
},
797+
}
798+
testCase.Run(t)
799+
}
800+
801+
func TestHealthCheck_SystemdIntegration_Advanced(t *testing.T) {
802+
testCase := nerdtest.Setup()
803+
testCase.Require = require.Not(nerdtest.Docker)
804+
// Skip systemd tests in rootless environment to bypass dbus permission issues
805+
if rootlessutil.IsRootless() {
806+
t.Skip("systemd healthcheck tests are skipped in rootless environment")
807+
}
808+
809+
testCase.SubTests = []*test.Case{
810+
{
811+
// Tests that CreateTimer() successfully creates systemd timer units and
812+
// RemoveTransientHealthCheckFiles() properly cleans up units when container stops.
813+
Description: "Systemd timer unit creation and cleanup",
814+
Setup: func(data test.Data, helpers test.Helpers) {
815+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
816+
"--health-cmd", "echo healthy",
817+
"--health-interval", "1s",
818+
testutil.CommonImage, "sleep", "30")
819+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
820+
},
821+
Cleanup: func(data test.Data, helpers test.Helpers) {
822+
helpers.Anyhow("rm", "-f", data.Identifier())
823+
},
824+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
825+
return helpers.Command("inspect", data.Identifier())
826+
},
827+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
828+
return &test.Expected{
829+
ExitCode: 0,
830+
Output: expect.All(func(stdout string, t tig.T) {
831+
// Get container ID and check systemd timer
832+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
833+
containerID := containerInspect.ID
834+
835+
// Check systemd timer
836+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
837+
result.Run(&test.Expected{
838+
ExitCode: expect.ExitCodeNoCheck,
839+
Output: func(stdout string, _ tig.T) {
840+
// Verify that a timer exists for this specific container
841+
assert.Assert(t, strings.Contains(stdout, containerID),
842+
"expected to find nerdctl healthcheck timer containing container ID: %s", containerID)
843+
},
844+
})
845+
// Stop container and verify cleanup
846+
helpers.Ensure("stop", data.Identifier())
847+
848+
// Check that timer is gone
849+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
850+
result.Run(&test.Expected{
851+
ExitCode: expect.ExitCodeNoCheck,
852+
Output: func(stdout string, _ tig.T) {
853+
assert.Assert(t, !strings.Contains(stdout, containerID),
854+
"expected nerdctl healthcheck timer for container ID %s to be removed after container stop", containerID)
855+
},
856+
})
857+
}),
858+
}
859+
},
860+
},
861+
{
862+
Description: "Container restart recreates systemd timer",
863+
Setup: func(data test.Data, helpers test.Helpers) {
864+
helpers.Ensure("run", "-d", "--name", data.Identifier(),
865+
"--health-cmd", "echo restart-test",
866+
"--health-interval", "2s",
867+
testutil.CommonImage, "sleep", "60")
868+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
869+
},
870+
Cleanup: func(data test.Data, helpers test.Helpers) {
871+
helpers.Anyhow("rm", "-f", data.Identifier())
872+
},
873+
Command: func(data test.Data, helpers test.Helpers) test.TestableCommand {
874+
// Get container ID for verification
875+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
876+
containerID := containerInspect.ID
877+
878+
// Step 1: Verify timer exists initially
879+
result := helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
880+
result.Run(&test.Expected{
881+
ExitCode: expect.ExitCodeNoCheck,
882+
Output: func(stdout string, t tig.T) {
883+
assert.Assert(t, strings.Contains(stdout, containerID),
884+
"expected timer for container %s to exist initially", containerID)
885+
},
886+
})
887+
888+
// Step 2: Stop container
889+
helpers.Ensure("stop", data.Identifier())
890+
891+
// Step 3: Verify timer is removed after stop
892+
result = helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
893+
result.Run(&test.Expected{
894+
ExitCode: expect.ExitCodeNoCheck,
895+
Output: func(stdout string, t tig.T) {
896+
assert.Assert(t, !strings.Contains(stdout, containerID),
897+
"expected timer for container %s to be removed after stop", containerID)
898+
},
899+
})
900+
901+
// Step 4: Restart container
902+
helpers.Ensure("start", data.Identifier())
903+
nerdtest.EnsureContainerStarted(helpers, data.Identifier())
904+
905+
// Step 5: Verify timer is recreated after restart - this is our final verification
906+
return helpers.Custom("systemctl", "list-timers", "--all", "--no-pager")
907+
},
908+
Expected: func(data test.Data, helpers test.Helpers) *test.Expected {
909+
return &test.Expected{
910+
ExitCode: expect.ExitCodeNoCheck,
911+
Output: func(stdout string, t tig.T) {
912+
containerInspect := nerdtest.InspectContainer(helpers, data.Identifier())
913+
containerID := containerInspect.ID
914+
assert.Assert(t, strings.Contains(stdout, containerID),
915+
"expected timer for container %s to be recreated after restart", containerID)
916+
},
917+
}
918+
},
919+
},
920+
}
921+
testCase.Run(t)
922+
}

0 commit comments

Comments
 (0)