Skip to content

Commit d26909c

Browse files
committed
Api availability measurement initial commit
1 parent 6a745dd commit d26909c

File tree

2 files changed

+287
-0
lines changed

2 files changed

+287
-0
lines changed
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"k8s.io/perf-tests/clusterloader2/pkg/errors"
23+
"sync"
24+
"time"
25+
26+
goerrors "github.com/go-errors/errors"
27+
clientset "k8s.io/client-go/kubernetes"
28+
"k8s.io/klog"
29+
"k8s.io/perf-tests/clusterloader2/pkg/measurement"
30+
measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
31+
"k8s.io/perf-tests/clusterloader2/pkg/util"
32+
)
33+
34+
const (
35+
apiAvailabilityName = "APIAvailability"
36+
)
37+
38+
func init() {
39+
if err := measurement.Register(apiAvailabilityName, createAPIAvailabilityMeasurement); err != nil {
40+
klog.Fatalf("Cannot register %s: %v", apiAvailabilityName, err)
41+
}
42+
}
43+
44+
func createAPIAvailabilityMeasurement() measurement.Measurement {
45+
return &apiAvailabilityMeasurement{}
46+
}
47+
48+
type apiAvailabilityMeasurement struct {
49+
isRunning bool
50+
stopCh chan struct{}
51+
hosts []string
52+
summaries []measurement.Summary
53+
clusterLevelMetrics *apiAvailabilityMetrics
54+
hostLevelMetrics map[string]*apiAvailabilityMetrics
55+
wg sync.WaitGroup
56+
}
57+
58+
func (a *apiAvailabilityMeasurement) updateMasterAvailabilityMetrics(c clientset.Interface, config *measurement.Config, provider string) {
59+
for _, host := range a.hosts {
60+
// SSH and check the health of the host
61+
command := fmt.Sprintf("curl -s -k %slocalhost:%v/healthz", "https://", 443)
62+
sshResult, err := measurementutil.SSH(command, host+":22", provider)
63+
if err != nil || sshResult.Code != 0 {
64+
a.updateAvailabilityMetrics(false, a.hostLevelMetrics[host])
65+
} else {
66+
a.updateAvailabilityMetrics(true, a.hostLevelMetrics[host])
67+
}
68+
}
69+
}
70+
71+
func (a *apiAvailabilityMeasurement) updateClusterAvailabilityMetrics(c clientset.Interface) {
72+
// Check the availability of the cluster by issuing a REST call to /healthz end point
73+
_, err := c.CoreV1().RESTClient().Get().AbsPath("/healthz").DoRaw(context.TODO())
74+
if err != nil {
75+
a.updateAvailabilityMetrics(false, a.clusterLevelMetrics)
76+
} else {
77+
a.updateAvailabilityMetrics(true, a.clusterLevelMetrics)
78+
}
79+
}
80+
81+
func (a *apiAvailabilityMeasurement) start(config *measurement.Config, SSHToMasterSupported bool, probeDuration int) error {
82+
a.hosts = config.ClusterFramework.GetClusterConfig().MasterIPs
83+
if len(a.hosts) < 1 {
84+
return goerrors.Errorf("API Server measurement can't execute due to no MasterIPs")
85+
}
86+
87+
k8sClient := config.ClusterFramework.GetClientSets().GetClient()
88+
provider, err := util.GetStringOrDefault(config.Params, "provider", config.ClusterFramework.GetClusterConfig().Provider)
89+
if err != nil {
90+
return err
91+
}
92+
93+
a.isRunning = true
94+
a.stopCh = make(chan struct{})
95+
a.wg.Add(1)
96+
97+
go func() {
98+
defer a.wg.Done()
99+
for {
100+
select {
101+
case <-a.stopCh:
102+
return
103+
case <-time.After(time.Duration(probeDuration)):
104+
a.updateClusterAvailabilityMetrics(k8sClient)
105+
if SSHToMasterSupported {
106+
a.updateMasterAvailabilityMetrics(k8sClient, config, provider)
107+
}
108+
}
109+
}
110+
}()
111+
return nil
112+
}
113+
114+
// Execute starts the api-server healthz probe end point from start action and
115+
// collects availability metrics in gather.
116+
func (a *apiAvailabilityMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
117+
SSHToMasterSupported := config.ClusterFramework.GetClusterConfig().SSHToMasterSupported
118+
action, err := util.GetString(config.Params, "action")
119+
if err != nil {
120+
return nil, err
121+
}
122+
123+
probeFrequency, err := util.GetIntOrDefault(config.Params, "frequency", 1)
124+
if err != nil {
125+
return nil, err
126+
}
127+
128+
switch action {
129+
case "start":
130+
if a.isRunning {
131+
klog.Infof("%s: measurement already running", a)
132+
return nil, nil
133+
}
134+
return nil, a.start(config, SSHToMasterSupported, probeFrequency)
135+
case "gather":
136+
err := a.stop(SSHToMasterSupported, probeFrequency)
137+
if err != nil {
138+
return nil, err
139+
}
140+
return a.summaries, nil
141+
default:
142+
return nil, fmt.Errorf("unknown action %v", action)
143+
}
144+
}
145+
146+
func (a *apiAvailabilityMeasurement) createClusterAvailabilitySummary(probeFrequency int) error {
147+
a.clusterLevelMetrics.updateMaxConsecutiveFailuresIfNeeded()
148+
clusterAvailabilityMetricsOutput := a.clusterLevelMetrics.buildAPIAvailabilityMetricsOutput(probeFrequency, true, "")
149+
content, err := util.PrettyPrintJSON(clusterAvailabilityMetricsOutput)
150+
if err != nil {
151+
return err
152+
} else {
153+
a.summaries = append(a.summaries, measurement.CreateSummary(apiAvailabilityName, "json", content))
154+
return nil
155+
}
156+
}
157+
158+
func (a *apiAvailabilityMeasurement) createMasterAvailabilitySummary(probeFrequency int) error {
159+
createMasterAvailabilitySummaryErrors := errors.NewErrorList()
160+
for _, host := range a.hosts {
161+
a.hostLevelMetrics[host].updateMaxConsecutiveFailuresIfNeeded()
162+
hostAvailabilityMetricsOutput := a.hostLevelMetrics[host].buildAPIAvailabilityMetricsOutput(probeFrequency,false, host)
163+
content, err := util.PrettyPrintJSON(hostAvailabilityMetricsOutput)
164+
if err != nil {
165+
createMasterAvailabilitySummaryErrors.Append(err)
166+
} else{
167+
summary := measurement.CreateSummary(apiAvailabilityName, "json", content)
168+
a.summaries = append(a.summaries, summary)
169+
}
170+
}
171+
if createMasterAvailabilitySummaryErrors.IsEmpty() {
172+
return nil
173+
}
174+
return createMasterAvailabilitySummaryErrors
175+
}
176+
177+
func (a *apiAvailabilityMeasurement) stop(SSHToMasterSupported bool, probeFrequency int) error {
178+
if !a.isRunning {
179+
return nil
180+
}
181+
close(a.stopCh)
182+
a.wg.Wait()
183+
184+
errList := errors.NewErrorList()
185+
err := a.createClusterAvailabilitySummary(probeFrequency)
186+
if err != nil {
187+
errList.Append(errList, err)
188+
}
189+
if SSHToMasterSupported {
190+
err := a.createMasterAvailabilitySummary(probeFrequency)
191+
if err != nil {
192+
errList.Append(errList, err)
193+
}
194+
}
195+
if errList.IsEmpty() {
196+
return nil
197+
}
198+
return errList
199+
}
200+
201+
// Dispose cleans up after the measurement.
202+
func (a apiAvailabilityMeasurement) Dispose() {}
203+
204+
// String returns string representation of this measurement.
205+
func (a apiAvailabilityMeasurement) String() string {
206+
return apiAvailabilityName
207+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"time"
21+
)
22+
23+
type apiAvailabilityMetrics struct {
24+
numSuccesses int
25+
numFailures int
26+
maxConsecutiveFailedProbes int
27+
consecutiveFailedProbes int
28+
}
29+
30+
type apiAvailabilityOutput struct {
31+
ClusterMetrics bool `json:"clusterMetrics"`
32+
HostIP string `json:"hostIP"`
33+
AvailabilityPercentage float32 `json:"availabilityPercentage"`
34+
LongestUnavailableDuration time.Duration `json:"longestUnavailableDuration"`
35+
}
36+
37+
// updateMaxConsecutiveFailuresIfNeeded checks if the recently concluded consecutive failed number of probes is
38+
// higher than the max consecutive failed number of probes so far
39+
// if yes, then Update max consecutive failed probes
40+
func (a *apiAvailabilityMetrics) updateMaxConsecutiveFailuresIfNeeded() {
41+
if a.consecutiveFailedProbes > a.maxConsecutiveFailedProbes {
42+
a.maxConsecutiveFailedProbes = a.consecutiveFailedProbes
43+
}
44+
}
45+
46+
func (a *apiAvailabilityMetrics) updateFailureMetrics() {
47+
a.numFailures++
48+
a.consecutiveFailedProbes++
49+
}
50+
51+
func (a *apiAvailabilityMetrics) updateSuccessMetrics() {
52+
a.numSuccesses++
53+
if a.consecutiveFailedProbes > 0 {
54+
a.updateMaxConsecutiveFailuresIfNeeded()
55+
a.consecutiveFailedProbes = 0
56+
}
57+
}
58+
59+
func (a *apiAvailabilityMeasurement) updateAvailabilityMetrics(apiServerAvailable bool, metrics *apiAvailabilityMetrics) {
60+
if apiServerAvailable {
61+
metrics.updateSuccessMetrics()
62+
} else {
63+
metrics.updateFailureMetrics()
64+
}
65+
}
66+
67+
func (metrics *apiAvailabilityMetrics) buildAPIAvailabilityMetricsOutput(probeFrequency int, clusterMetrics bool, hostIP string) *apiAvailabilityOutput {
68+
// Gather availability metrics
69+
availabilityPercentage := (float32(metrics.numSuccesses) / float32(metrics.numSuccesses+ metrics.numFailures)) * 100
70+
longestUnavailableDuration := time.Duration(metrics.maxConsecutiveFailedProbes * probeFrequency)
71+
availabilityMetrics := &apiAvailabilityOutput{}
72+
availabilityMetrics.AvailabilityPercentage = availabilityPercentage
73+
availabilityMetrics.LongestUnavailableDuration = longestUnavailableDuration
74+
availabilityMetrics.ClusterMetrics = clusterMetrics
75+
if !clusterMetrics {
76+
availabilityMetrics.HostIP = hostIP
77+
}
78+
return availabilityMetrics
79+
}
80+

0 commit comments

Comments
 (0)