Skip to content

Commit

Permalink
added ReplicaReadyStatus metric, added unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Edwinhr716 committed Apr 16, 2024
1 parent d7f8907 commit c901648
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 1 deletion.
14 changes: 13 additions & 1 deletion pkg/controllers/leaderworkerset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"strconv"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand All @@ -41,6 +42,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

leaderworkerset "sigs.k8s.io/lws/api/leaderworkerset/v1"
"sigs.k8s.io/lws/pkg/metrics"
"sigs.k8s.io/lws/pkg/utils"
podutils "sigs.k8s.io/lws/pkg/utils/pod"
statefulsetutils "sigs.k8s.io/lws/pkg/utils/statefulset"
Expand Down Expand Up @@ -367,7 +369,8 @@ func (r *LeaderWorkerSetReconciler) updateConditions(ctx context.Context, lws *l
}
if podutils.PodRunningAndReady(leaderPod) {
readyCount++

waitTime := getLastTransitionTime(string(leaderworkerset.LeaderWorkerSetProgressing), lws)
metrics.ReplicaReadyStatus(sts.Name, time.Since(waitTime.Time))
if sts.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash && leaderPod.Labels[leaderworkerset.TemplateRevisionHashKey] == templateHash {
updatedCount++
}
Expand Down Expand Up @@ -569,3 +572,12 @@ func templateUpdated(sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerS
func replicasUpdated(sts *appsv1.StatefulSet, lws *leaderworkerset.LeaderWorkerSet) bool {
return *sts.Spec.Replicas != *lws.Spec.Replicas
}

func getLastTransitionTime(conditionType string, lws *leaderworkerset.LeaderWorkerSet) metav1.Time {
for _, condition := range lws.Status.Conditions {
if condition.Type == conditionType {
return condition.LastTransitionTime
}
}
return metav1.Now()
}
13 changes: 13 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ var (
Help: "number of times a group has been recreated",
}, []string{"leadername"},
)

replicaReadyStatusDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: "lws",
Name: "replica_ready_status_duration",
Help: "latency for each replica to be scheduled and become ready",
}, []string{"leadername"},
)
)

func RollingUpdate(hash string, duration time.Duration) {
Expand All @@ -49,9 +57,14 @@ func RecreatingGroup(leaderName string) {
recreateGroupTimes.WithLabelValues(leaderName).Inc()
}

func ReplicaReadyStatus(leaderName string, time time.Duration) {
replicaReadyStatusDuration.WithLabelValues(leaderName).Observe(time.Seconds())
}

func Register() {
metrics.Registry.MustRegister(
rollingUpdateDuration,
recreateGroupTimes,
replicaReadyStatusDuration,
)
}
28 changes: 28 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package metrics

import (
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)

func TestRecreatingGroup(t *testing.T) {
prometheus.MustRegister(recreateGroupTimes)

RecreatingGroup("lws-sample-0")
RecreatingGroup("lws-sample-1")
RecreatingGroup("lws-sample-0")

if count := testutil.CollectAndCount(recreateGroupTimes); count != 2 {
t.Errorf("Expecting %d metrics, got: %d", 2, count)
}

if count := testutil.ToFloat64(recreateGroupTimes.WithLabelValues("lws-sample-0")); count != float64(2) {
t.Errorf("Expecting %s to have value %d, but got %f", "lws-sample-0", 2, count)
}

if count := testutil.ToFloat64(recreateGroupTimes.WithLabelValues("lws-sample-1")); count != float64(1) {
t.Errorf("Expecting %s to have value %d, but got %f", "lws-sample-1", 1, count)
}
}

0 comments on commit c901648

Please sign in to comment.