diff --git a/cmd/kubesolo/main.go b/cmd/kubesolo/main.go index d4b6fea2..d3ad6a0d 100644 --- a/cmd/kubesolo/main.go +++ b/cmd/kubesolo/main.go @@ -96,6 +96,10 @@ func main() { os.Exit(0) } + if *flags.StartupTimeout > 0 { + types.DefaultRetryCount = *flags.StartupTimeout / int(types.DefaultComponentSleep.Seconds()) + } + service, err := service() if err != nil { log.Fatal().Err(err).Msg("failed to create service. check the logs for more information. exiting...") diff --git a/internal/config/flags/flags.go b/internal/config/flags/flags.go index 56799425..8a61cbf1 100644 --- a/internal/config/flags/flags.go +++ b/internal/config/flags/flags.go @@ -28,4 +28,5 @@ var ( Full = Application.Flag("full", "Disable memory-saving overrides and use upstream Kubernetes defaults. Kubesolo still uses NodeSetter in favour of the scheduler. Recommended for CI and developer environments where memory is not constrained. Leave unset for edge deployments.").Envar("KUBESOLO_FULL").Default("false").Bool() DBWALRepair = Application.Flag("db-wal-repair", "On startup, run an integrity check against the SQLite database and remove WAL artefacts (state.db-wal, state.db-shm) if corruption is detected. Recovers from unclean shutdowns caused by power loss. Defaults to false.").Envar("KUBESOLO_DB_WAL_REPAIR").Default("false").Bool() DisableIPv6 = Application.Flag("disable-ipv6", "Disable IPv6 support. When set, CoreDNS will not serve ip6.arpa reverse zones and kubelet will register with an explicit IPv4 node address. Defaults to false.").Envar("KUBESOLO_DISABLE_IPV6").Default("false").Bool() + StartupTimeout = Application.Flag("startup-timeout", "Maximum time in seconds to wait for each component to pass its health check during startup. Increase on slow storage such as SD cards. Defaults to 600.").Envar("KUBESOLO_STARTUP_TIMEOUT").Default("600").Int() ) diff --git a/types/const.go b/types/const.go index f9c7f027..c7646f17 100644 --- a/types/const.go +++ b/types/const.go @@ -27,14 +27,14 @@ const ( DefaultKineSocket = "kine.sock" DefaultControllerManagerDir = "controller-manager" DefaultSandboxImage = "docker.io/portainer/pause:latest" - DefaultPortainerAgentImage = "docker.io/portainer/agent:2.39.1" - DefaultCoreDNSImage = "docker.io/coredns/coredns:1.14.1" - DefaultLocalPathProvisionerImage = "docker.io/rancher/local-path-provisioner:v0.0.34" + DefaultPortainerAgentImage = "docker.io/portainer/agent:2.39.2" + DefaultCoreDNSImage = "docker.io/coredns/coredns:1.14.3" + DefaultLocalPathProvisionerImage = "docker.io/rancher/local-path-provisioner:v0.0.36" DefaultLocalPathStorageDir = "local-path-storage" DefaultWebhookReadWriteTimeout = 10 * time.Second DefaultWebhookIdleTimeout = 30 * time.Second DefaultContextTimeout = 15 * time.Second DefaultComponentSleep = 5 * time.Second - DefaultRetryCount = 5 + DefaultStartupTimeout = 600 // seconds DefaultNftMasqTable = "kubesolo-masq" ) diff --git a/types/var.go b/types/var.go index 8289adbe..2600ada3 100644 --- a/types/var.go +++ b/types/var.go @@ -6,4 +6,8 @@ var ( KubesoloKineDir = filepath.Join(DefaultKineDir, "db") KubesoloControllerManagerDir = filepath.Join(DefaultControllerManagerDir, "config") KubesoloWebhookDir = filepath.Join(DefaultPKIDir, "webhook") + + // DefaultRetryCount is the number of health-check retries per component. + // Derived from --startup-timeout at startup: timeout / DefaultComponentSleep. + DefaultRetryCount = DefaultStartupTimeout / int(DefaultComponentSleep.Seconds()) )