Skip to content

Commit a0c446f

Browse files
committed
Enable health checks on all Kubernetes clusters by default
A per-resource health check config approach is implemented for enabling ease of adoption of new health checks, while avoiding migration of the backend database. Changes: - Added `default_kube` health check config which enables health checks on all Kubernetes clusters - Revised initialization and insert logic for health check configs Part of #58413
1 parent dcb8fff commit a0c446f

File tree

5 files changed

+135
-36
lines changed

5 files changed

+135
-36
lines changed

constants.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -775,9 +775,16 @@ const (
775775
var PresetRoles = []string{PresetEditorRoleName, PresetAccessRoleName, PresetAuditorRoleName}
776776

777777
const (
778-
// PresetDefaultHealthCheckConfigName is the name of a preset
779-
// default health_check_config that enables health checks for all resources.
780-
PresetDefaultHealthCheckConfigName = "default"
778+
// PresetDefaultHealthCheckConfigDBName is the name of a preset
779+
// health_check_config that enables health checks for all
780+
// database resources. For historical reasons, this preset is named
781+
// "default" even though it applies only to databases.
782+
PresetDefaultHealthCheckConfigDBName = "default"
783+
784+
// PresetDefaultHealthCheckConfigKubeName is the name of a preset
785+
// health_check_config that enables health checks for all
786+
// Kubernetes resources.
787+
PresetDefaultHealthCheckConfigKubeName = "default_kube"
781788
)
782789

783790
const (

lib/auth/export_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ func CreatePresetRoles(ctx context.Context, um PresetRoleManager) error {
293293
return createPresetRoles(ctx, um)
294294
}
295295

296-
func CreatePresetHealthCheckConfig(ctx context.Context, svc services.HealthCheckConfig) error {
297-
return createPresetHealthCheckConfig(ctx, svc)
296+
func CreatePresetHealthCheckConfigs(ctx context.Context, svc services.HealthCheckConfig) error {
297+
return createPresetHealthCheckConfigs(ctx, svc)
298298
}
299299

300300
func GetPresetUsers() []types.User {

lib/auth/init.go

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import (
4444
"github.com/gravitational/teleport/api/client/proto"
4545
autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
4646
clusterconfigpb "github.com/gravitational/teleport/api/gen/proto/go/teleport/clusterconfig/v1"
47+
healthcheckconfigv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/healthcheckconfig/v1"
4748
machineidv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1"
4849
"github.com/gravitational/teleport/api/types"
4950
"github.com/gravitational/teleport/api/types/clusterconfig"
@@ -64,6 +65,7 @@ import (
6465
"github.com/gravitational/teleport/lib/backend"
6566
"github.com/gravitational/teleport/lib/cryptosuites"
6667
"github.com/gravitational/teleport/lib/events"
68+
"github.com/gravitational/teleport/lib/itertools/stream"
6769
"github.com/gravitational/teleport/lib/modules"
6870
"github.com/gravitational/teleport/lib/service/servicecfg"
6971
"github.com/gravitational/teleport/lib/services"
@@ -680,11 +682,11 @@ func initCluster(ctx context.Context, cfg InitConfig, asrv *Server) error {
680682
}
681683
span.AddEvent("completed creating database object import rules")
682684

683-
span.AddEvent("creating preset health check config")
684-
if err := createPresetHealthCheckConfig(ctx, asrv); err != nil {
685+
span.AddEvent("creating preset health check configs")
686+
if err := createPresetHealthCheckConfigs(ctx, asrv); err != nil {
685687
return trace.Wrap(err)
686688
}
687-
span.AddEvent("completed creating preset health check config")
689+
span.AddEvent("completed creating preset health check configs")
688690
} else {
689691
asrv.logger.InfoContext(ctx, "skipping preset role and user creation")
690692
}
@@ -1486,23 +1488,40 @@ func createPresetDatabaseObjectImportRule(ctx context.Context, rules services.Da
14861488
return nil
14871489
}
14881490

1489-
// createPresetHealthCheckConfig creates a default preset health check config
1490-
// resource that enables health checks on all resources.
1491-
func createPresetHealthCheckConfig(ctx context.Context, svc services.HealthCheckConfig) error {
1492-
page, _, err := svc.ListHealthCheckConfigs(ctx, 0, "")
1491+
// NewPresetHealthFunc is a function that creates a new HealthCheckConfig preset.
1492+
type NewPresetHealthFunc func() *healthcheckconfigv1.HealthCheckConfig
1493+
1494+
// newHealthPresets maps preset health names to preset creator functions.
1495+
var newHealthPresets = map[string]NewPresetHealthFunc{
1496+
teleport.PresetDefaultHealthCheckConfigDBName: services.NewPresetHealthCheckConfigDB,
1497+
teleport.PresetDefaultHealthCheckConfigKubeName: services.NewPresetHealthCheckConfigKube,
1498+
}
1499+
1500+
// createPresetHealthCheckConfigs creates a preset health check config
1501+
// for each resource using the healthcheck package.
1502+
func createPresetHealthCheckConfigs(ctx context.Context, svc services.HealthCheckConfig) error {
1503+
// The choice to create a preset per-resource is motivated by:
1504+
// - Supporting existing Teleport clusters already using health checks with some resources
1505+
// - Avoiding migration of the backend database, which avoids downtime and headaches
1506+
// - Easing the adoption of health checks for new resources as they are developed over time
1507+
exists := make(map[string]bool)
1508+
cfgs, err := stream.Collect(clientutils.Resources(ctx, svc.ListHealthCheckConfigs))
14931509
if err != nil {
1494-
return trace.Wrap(err, "failed listing available health check configs")
1510+
return trace.Wrap(err, "unable to list health check configs")
14951511
}
1496-
if len(page) > 0 {
1497-
return nil
1512+
for _, cfg := range cfgs {
1513+
exists[cfg.GetMetadata().GetName()] = true
14981514
}
1499-
preset := services.NewPresetHealthCheckConfig()
1500-
_, err = svc.CreateHealthCheckConfig(ctx, preset)
1501-
if err != nil && !trace.IsAlreadyExists(err) {
1502-
return trace.Wrap(err,
1503-
"failed creating preset health_check_config %s",
1504-
preset.GetMetadata().GetName(),
1505-
)
1515+
var errs []error
1516+
for name, newPreset := range newHealthPresets {
1517+
if !exists[name] {
1518+
if _, err = svc.CreateHealthCheckConfig(ctx, newPreset()); err != nil && !trace.IsAlreadyExists(err) {
1519+
errs = append(errs, err)
1520+
}
1521+
}
1522+
}
1523+
if len(errs) > 0 {
1524+
return trace.NewAggregate(errs...)
15061525
}
15071526
return nil
15081527
}

lib/auth/init_test.go

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ func TestBadIdentity(t *testing.T) {
141141

142142
// bad cert type
143143
_, err = state.ReadSSHIdentityFromKeyPair(priv, pub)
144-
require.IsType(t, trace.BadParameter(""), err)
144+
require.ErrorIs(t, trace.BadParameter("failed to parse server certificate: not an SSH certificate"), err)
145145

146146
// missing authority domain
147147
cert, err := a.GenerateHostCert(sshca.HostCertificateRequest{
@@ -158,7 +158,7 @@ func TestBadIdentity(t *testing.T) {
158158
require.NoError(t, err)
159159

160160
_, err = state.ReadSSHIdentityFromKeyPair(priv, cert)
161-
require.IsType(t, trace.BadParameter(""), err)
161+
require.ErrorIs(t, trace.BadParameter("missing cert extension x-teleport-authority"), err)
162162

163163
// missing host uuid
164164
cert, err = a.GenerateHostCert(sshca.HostCertificateRequest{
@@ -175,7 +175,7 @@ func TestBadIdentity(t *testing.T) {
175175
require.NoError(t, err)
176176

177177
_, err = state.ReadSSHIdentityFromKeyPair(priv, cert)
178-
require.IsType(t, trace.BadParameter(""), err)
178+
require.ErrorIs(t, trace.BadParameter("missing cert extension x-teleport-authority"), err)
179179

180180
// unrecognized role
181181
cert, err = a.GenerateHostCert(sshca.HostCertificateRequest{
@@ -192,7 +192,7 @@ func TestBadIdentity(t *testing.T) {
192192
require.NoError(t, err)
193193

194194
_, err = state.ReadSSHIdentityFromKeyPair(priv, cert)
195-
require.IsType(t, trace.BadParameter(""), err)
195+
require.ErrorIs(t, trace.BadParameter("invalid role \"bad role\""), err)
196196
}
197197

198198
func TestSignatureAlgorithmSuite(t *testing.T) {
@@ -969,14 +969,14 @@ func TestPresets(t *testing.T) {
969969
err := auth.CreatePresetRoles(ctx, as)
970970
require.NoError(t, err)
971971

972-
err = auth.CreatePresetHealthCheckConfig(ctx, as)
972+
err = auth.CreatePresetHealthCheckConfigs(ctx, as)
973973
require.NoError(t, err)
974974

975975
// Second call should not fail
976976
err = auth.CreatePresetRoles(ctx, as)
977977
require.NoError(t, err)
978978

979-
err = auth.CreatePresetHealthCheckConfig(ctx, as)
979+
err = auth.CreatePresetHealthCheckConfigs(ctx, as)
980980
require.NoError(t, err)
981981

982982
// Presets were created
@@ -985,7 +985,7 @@ func TestPresets(t *testing.T) {
985985
require.NoError(t, err)
986986
}
987987

988-
cfg, err := as.GetHealthCheckConfig(ctx, teleport.PresetDefaultHealthCheckConfigName)
988+
cfg, err := as.GetHealthCheckConfig(ctx, teleport.PresetDefaultHealthCheckConfigDBName)
989989
require.NoError(t, err)
990990
require.NotNil(t, cfg)
991991
})
@@ -1021,12 +1021,12 @@ func TestPresets(t *testing.T) {
10211021
as.SetClock(clock)
10221022

10231023
// an existing health check config should not be modified by init
1024-
cfg := services.NewPresetHealthCheckConfig()
1024+
cfg := services.NewPresetHealthCheckConfigDB()
10251025
cfg.Spec.Interval = durationpb.New(42 * time.Second)
10261026
cfg, err := as.CreateHealthCheckConfig(ctx, cfg)
10271027
require.NoError(t, err)
10281028

1029-
err = auth.CreatePresetHealthCheckConfig(ctx, as)
1029+
err = auth.CreatePresetHealthCheckConfigs(ctx, as)
10301030
require.NoError(t, err)
10311031

10321032
// Preset was created. Ensure it didn't overwrite the existing config
@@ -1035,6 +1035,53 @@ func TestPresets(t *testing.T) {
10351035
require.Equal(t, cfg.Spec.Interval.AsDuration(), got.Spec.Interval.AsDuration())
10361036
})
10371037

1038+
t.Run("AddAllHealthCheckConfigs", func(t *testing.T) {
1039+
as := newTestAuthServer(ctx, t)
1040+
clock := clockwork.NewFakeClock()
1041+
as.SetClock(clock)
1042+
1043+
// Create all health check presets.
1044+
err := auth.CreatePresetHealthCheckConfigs(ctx, as)
1045+
require.NoError(t, err)
1046+
1047+
// Check that all presets were created.
1048+
db, err := as.GetHealthCheckConfig(ctx, teleport.PresetDefaultHealthCheckConfigDBName)
1049+
require.NoError(t, err)
1050+
require.NotNil(t, db)
1051+
require.Equal(t,
1052+
teleport.PresetDefaultHealthCheckConfigDBName,
1053+
db.GetMetadata().GetName())
1054+
kube, err := as.GetHealthCheckConfig(ctx, teleport.PresetDefaultHealthCheckConfigKubeName)
1055+
require.NoError(t, err)
1056+
require.NotNil(t, kube)
1057+
require.Equal(t,
1058+
teleport.PresetDefaultHealthCheckConfigKubeName,
1059+
kube.GetMetadata().GetName())
1060+
})
1061+
1062+
t.Run("AddKubeHealthCheckConfig", func(t *testing.T) {
1063+
as := newTestAuthServer(ctx, t)
1064+
clock := clockwork.NewFakeClock()
1065+
as.SetClock(clock)
1066+
1067+
// Simulate an existing cluster with db health checks.
1068+
db, err := as.CreateHealthCheckConfig(ctx, services.NewPresetHealthCheckConfigDB())
1069+
require.NoError(t, err)
1070+
require.NotNil(t, db)
1071+
1072+
// Attempt to create all health check presets.
1073+
err = auth.CreatePresetHealthCheckConfigs(ctx, as)
1074+
require.NoError(t, err)
1075+
1076+
// Check that the kube preset was created.
1077+
kube, err := as.GetHealthCheckConfig(ctx, teleport.PresetDefaultHealthCheckConfigKubeName)
1078+
require.NoError(t, err)
1079+
require.NotNil(t, kube)
1080+
require.Equal(t,
1081+
teleport.PresetDefaultHealthCheckConfigKubeName,
1082+
kube.GetMetadata().GetName())
1083+
})
1084+
10381085
// If a default allow condition is not present, ensure it gets added.
10391086
t.Run("AddDefaultAllowConditions", func(t *testing.T) {
10401087
as := newTestAuthServer(ctx, t)

lib/services/presets.go

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -855,15 +855,15 @@ func NewPresetMCPUserRole() types.Role {
855855
return role
856856
}
857857

858-
// NewPresetHealthCheckConfig returns a preset default health_check_config that
859-
// enables health checks for all resources.
860-
func NewPresetHealthCheckConfig() *healthcheckconfigv1.HealthCheckConfig {
858+
// NewPresetHealthCheckConfigDB returns a preset health_check_config
859+
// enabling health checks for all databases resources.
860+
func NewPresetHealthCheckConfigDB() *healthcheckconfigv1.HealthCheckConfig {
861861
return &healthcheckconfigv1.HealthCheckConfig{
862862
Kind: types.KindHealthCheckConfig,
863863
Version: types.V1,
864864
Metadata: &headerv1.Metadata{
865-
Name: teleport.PresetDefaultHealthCheckConfigName,
866-
Description: "Enables all health checks by default",
865+
Name: teleport.PresetDefaultHealthCheckConfigDBName,
866+
Description: "Enables health checks for all databases by default",
867867
Namespace: apidefaults.Namespace,
868868
Labels: map[string]string{
869869
types.TeleportInternalResourceType: types.PresetResource,
@@ -881,6 +881,32 @@ func NewPresetHealthCheckConfig() *healthcheckconfigv1.HealthCheckConfig {
881881
}
882882
}
883883

884+
// NewPresetHealthCheckConfigKube returns a preset health_check_config
885+
// enabling health checks for all Kubernetes resources.
886+
func NewPresetHealthCheckConfigKube() *healthcheckconfigv1.HealthCheckConfig {
887+
return &healthcheckconfigv1.HealthCheckConfig{
888+
Kind: types.KindHealthCheckConfig,
889+
Version: types.V1,
890+
Metadata: &headerv1.Metadata{
891+
Name: teleport.PresetDefaultHealthCheckConfigKubeName,
892+
Description: "Enables health checks for all Kubernetes clusters by default",
893+
Namespace: apidefaults.Namespace,
894+
Labels: map[string]string{
895+
types.TeleportInternalResourceType: types.PresetResource,
896+
},
897+
},
898+
Spec: &healthcheckconfigv1.HealthCheckConfigSpec{
899+
Match: &healthcheckconfigv1.Matcher{
900+
// match all kubernetes clusters
901+
KubernetesLabels: []*labelv1.Label{{
902+
Name: types.Wildcard,
903+
Values: []string{types.Wildcard},
904+
}},
905+
},
906+
},
907+
}
908+
}
909+
884910
// bootstrapRoleMetadataLabels are metadata labels that will be applied to each role.
885911
// These are intended to add labels for older roles that didn't previously have them.
886912
func bootstrapRoleMetadataLabels() map[string]map[string]string {

0 commit comments

Comments
 (0)