Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/activator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ func main() {
// Open a WebSocket connection to the autoscaler.
autoscalerEndpoint := "ws://" + pkgnet.GetServiceHostname("autoscaler", system.Namespace()) + autoscalerPort
logger.Info("Connecting to Autoscaler at ", autoscalerEndpoint)
statSink := websocket.NewDurableSendingConnection(autoscalerEndpoint, logger)
statSink := websocket.NewDurableSendingConnection(autoscalerEndpoint, logger,
activator.AutoscalerConnectionOptions(logger, mp)...)
defer statSink.Shutdown()
go activator.ReportStats(logger, statSink, statCh)

Expand Down
70 changes: 70 additions & 0 deletions pkg/activator/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
Copyright 2024 The Knative Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package activator

import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

var scopeName = "knative.dev/serving/pkg/activator"

// peerAttrKey is the attribute key for identifying the connection peer.
var peerAttrKey = attribute.Key("peer")

// PeerAutoscaler is the attribute value for autoscaler connections.
var PeerAutoscaler = peerAttrKey.String("autoscaler")

type statReporterMetrics struct {
reachable metric.Int64Gauge
connectionErrors metric.Int64Counter
}

func newStatReporterMetrics(mp metric.MeterProvider) *statReporterMetrics {
var (
m statReporterMetrics
err error
provider = mp
)

if provider == nil {
provider = otel.GetMeterProvider()
}

meter := provider.Meter(scopeName)

m.reachable, err = meter.Int64Gauge(
"kn.activator.reachable",
metric.WithDescription("Whether a peer is reachable from the activator (1 = reachable, 0 = not reachable)"),
metric.WithUnit("{reachable}"),
)
if err != nil {
panic(err)
}

m.connectionErrors, err = meter.Int64Counter(
"kn.activator.connection_errors",
metric.WithDescription("Number of connection errors from the activator"),
metric.WithUnit("{error}"),
)
if err != nil {
panic(err)
}

return &m
}
34 changes: 29 additions & 5 deletions pkg/activator/stat_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@ limitations under the License.
package activator

import (
"context"

"github.com/gorilla/websocket"
"go.opentelemetry.io/otel/metric"
"go.uber.org/zap"
"knative.dev/serving/pkg/autoscaler/metrics"
pkgwebsocket "knative.dev/pkg/websocket"
asmetrics "knative.dev/serving/pkg/autoscaler/metrics"
)

// RawSender sends raw byte array messages with a message type
Expand All @@ -28,21 +32,41 @@ type RawSender interface {
SendRaw(msgType int, msg []byte) error
}

// AutoscalerConnectionOptions returns websocket connection options that handle
// connection status changes via callbacks. This enables real-time metric updates
// when the connection state changes, without polling.
func AutoscalerConnectionOptions(logger *zap.SugaredLogger, mp metric.MeterProvider) []pkgwebsocket.ConnectionOption {
metrics := newStatReporterMetrics(mp)
return []pkgwebsocket.ConnectionOption{
pkgwebsocket.WithOnConnect(func() {
logger.Info("Autoscaler connection established")
metrics.reachable.Record(context.Background(), 1, metric.WithAttributes(PeerAutoscaler))
}),
pkgwebsocket.WithOnDisconnect(func(err error) {
logger.Errorw("Autoscaler connection lost", zap.Error(err))
metrics.reachable.Record(context.Background(), 0, metric.WithAttributes(PeerAutoscaler))
metrics.connectionErrors.Add(context.Background(), 1, metric.WithAttributes(PeerAutoscaler))
}),
}
}

// ReportStats sends any messages received on the source channel to the sink.
// The messages are sent on a goroutine to avoid blocking, which means that
// messages may arrive out of order.
func ReportStats(logger *zap.SugaredLogger, sink RawSender, source <-chan []metrics.StatMessage) {
func ReportStats(logger *zap.SugaredLogger, sink RawSender, source <-chan []asmetrics.StatMessage) {
for sms := range source {
go func(sms []metrics.StatMessage) {
wsms := metrics.ToWireStatMessages(sms)
go func(sms []asmetrics.StatMessage) {
wsms := asmetrics.ToWireStatMessages(sms)
b, err := wsms.Marshal()
if err != nil {
logger.Errorw("Error while marshaling stats", zap.Error(err))
return
}

if err := sink.SendRaw(websocket.BinaryMessage, b); err != nil {
logger.Errorw("Error while sending stats", zap.Error(err))
logger.Errorw("Autoscaler is not reachable from activator. Stats were not sent.",
zap.Error(err),
zap.Int("stat_message_count", len(sms)))
}
}(sms)
}
Expand Down
39 changes: 39 additions & 0 deletions pkg/activator/stat_reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package activator

import (
"errors"
"testing"
"time"

Expand Down Expand Up @@ -95,3 +96,41 @@ type sendRawFunc func(msgType int, msg []byte) error
func (fn sendRawFunc) SendRaw(msgType int, msg []byte) error {
return fn(msgType, msg)
}

func TestReportStatsSendFailure(t *testing.T) {
logger := logtesting.TestLogger(t)
ch := make(chan []metrics.StatMessage)

sendErr := errors.New("connection refused")
errorReceived := make(chan struct{})
sink := sendRawFunc(func(msgType int, msg []byte) error {
close(errorReceived)
return sendErr
})

defer close(ch)
go ReportStats(logger, sink, ch)

// Send a stat message
ch <- []metrics.StatMessage{{
Key: types.NamespacedName{Name: "test-revision"},
}}

// Wait for the error to be processed
select {
case <-errorReceived:
// Success - the error path was executed
case <-time.After(2 * time.Second):
t.Fatal("SendRaw was not called within timeout")
}
}

func TestAutoscalerConnectionOptions(t *testing.T) {
logger := logtesting.TestLogger(t)

opts := AutoscalerConnectionOptions(logger, nil)

if len(opts) != 2 {
t.Errorf("Expected 2 connection options, got %d", len(opts))
}
}
Loading