Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/operation/operation.md
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,10 @@ The following timer metrics are exposed per used bundle-name:
- `skipper.opaAuthorizeRequest.custom.eval_time.<bundle-name>`
- `skipper.opaServeResponse.custom.eval_time.<bundle-name>`

Open Policy Agent [native Prometheus metrics](https://www.openpolicyagent.org/docs/monitoring#status-metrics) are passed through if the metrics backend is set to Prometheus (via `--metrics-flavour`).

The OPA native metrics are prefixed with `skipper_openpolicyagent_`, e.g. `skipper_openpolicyagent_plugin_status_gauge` will be exposed via Skipper's `/metrics` endpoint. Two extra labels are added to all metrics: `opa_instance_name` (set to the bundle name parameter of the filters) and `opa_instance_id` (a random ID that identifies the virtual OPA instance).

### RouteSRV metrics

RouteSRV metrics expose the following metrics in Prometheus format:
Expand Down
29 changes: 29 additions & 0 deletions filters/openpolicyagent/internal/confighook.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/open-policy-agent/opa/v1/plugins"
"github.com/open-policy-agent/opa/v1/plugins/bundle"
"github.com/open-policy-agent/opa/v1/plugins/discovery"
"github.com/open-policy-agent/opa/v1/plugins/status"
)

// ManualOverride is override the plugin trigger to manual trigger mode, allowing the openpolicyagent filter
Expand Down Expand Up @@ -82,3 +83,31 @@ func bundlePluginConfigOverride(config *config.Config) (*config.Config, error) {
}
return config, nil
}

// PrometheusOverride is a config hook that enables Prometheus metrics for the status plugin.
type PrometheusOverride struct {
}

func (p *PrometheusOverride) OnConfig(ctx context.Context, config *config.Config) (*config.Config, error) {
var (
statusConfig status.Config
message []byte
)

if config.Status != nil {
err := json.Unmarshal(config.Status, &statusConfig)
if err != nil {
return nil, err
}

statusConfig.Prometheus = true

message, err = json.Marshal(statusConfig)
if err != nil {
return nil, err
}
config.Status = message
}

return config, nil
}
40 changes: 35 additions & 5 deletions filters/openpolicyagent/openpolicyagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
iCache "github.com/open-policy-agent/opa/v1/topdown/cache"
opatracing "github.com/open-policy-agent/opa/v1/tracing"
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/zalando/skipper/filters/openpolicyagent/internal"
"golang.org/x/sync/semaphore"
"google.golang.org/protobuf/encoding/protojson"
Expand Down Expand Up @@ -109,7 +110,8 @@ type OpenPolicyAgentRegistry struct {
maxRequestBodyBytes int64
bodyReadBufferSize int64

tracer opentracing.Tracer
tracer opentracing.Tracer
prometheusRegisterer prometheus.Registerer

enableCustomControlLoop bool
controlLoopInterval time.Duration
Expand Down Expand Up @@ -221,6 +223,13 @@ func WithControlLoopMaxJitter(maxJitter time.Duration) func(*OpenPolicyAgentRegi
}
}

func WithPrometheusRegisterer(registerer prometheus.Registerer) func(*OpenPolicyAgentRegistry) error {
return func(cfg *OpenPolicyAgentRegistry) error {
cfg.prometheusRegisterer = registerer
return nil
}
}

func (registry *OpenPolicyAgentRegistry) initializeCache() error {
// This line interpolates the config template with a dummy bundle name to make sure the config is parseable.
// It is safe in production because the result is not used for anything except caching configuration.
Expand Down Expand Up @@ -678,18 +687,39 @@ func (registry *OpenPolicyAgentRegistry) new(store storage.Store, bundleName str
var logger logging.Logger = &QuietLogger{target: logging.Get()}
logger = logger.WithFields(map[string]interface{}{"bundle-name": bundleName})

configHooks := hooks.New()
var configHooks []hooks.Hook
if registry.enableCustomControlLoop {
configHooks = hooks.New(&internal.ManualOverride{})
configHooks = append(configHooks, &internal.ManualOverride{})
}

manager, err := plugins.New(configBytes, id, store, configLabelsInfo(*opaConfig), plugins.Logger(logger), registry.withTracingOptions(bundleName), plugins.WithHooks(configHooks))
var registerer prometheus.Registerer
if registry.prometheusRegisterer != nil {
registerer = prometheus.WrapRegistererWith(
prometheus.Labels{
"opa_instance_name": bundleName,
"opa_instance_id": id,
},
registry.prometheusRegisterer,
)

configHooks = append(configHooks, &internal.PrometheusOverride{})
}

manager, err := plugins.New(configBytes,
id,
store,
configLabelsInfo(*opaConfig),
plugins.Logger(logger),
registry.withTracingOptions(bundleName),
plugins.WithHooks(hooks.New(configHooks...)),
plugins.WithPrometheusRegister(registerer))

if err != nil {
return nil, err
}

discoveryPlugin, err := discovery.New(manager, discovery.Factories(map[string]plugins.Factory{envoy.PluginName: envoy.Factory{}}), discovery.Hooks(configHooks))
discoveryPlugin, err := discovery.New(manager, discovery.Factories(map[string]plugins.Factory{envoy.PluginName: envoy.Factory{}}), discovery.Hooks(hooks.New(configHooks...)))

if err != nil {
return nil, err
}
Expand Down
57 changes: 56 additions & 1 deletion filters/openpolicyagent/openpolicyagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ import (
"github.com/zalando/skipper/routing"
"github.com/zalando/skipper/tracing/tracingtest"
"google.golang.org/protobuf/encoding/protojson"

"github.com/prometheus/client_golang/prometheus"
)

type MockOpenPolicyAgentFilter struct {
Expand Down Expand Up @@ -155,6 +157,7 @@ func mockControlPlaneWithDiscoveryBundle(discoveryBundle string) (*opasdktest.Se

type controlPlaneConfig struct {
enableJwtCaching bool
enableStatus bool
}
type ControlPlaneOption func(*controlPlaneConfig)

Expand All @@ -164,6 +167,12 @@ func WithJwtCaching(enabled bool) ControlPlaneOption {
}
}

func WithStatusPluginEnabled(enabled bool) ControlPlaneOption {
return func(cfg *controlPlaneConfig) {
cfg.enableStatus = enabled
}
}

func mockControlPlaneWithResourceBundle(opts ...ControlPlaneOption) (*opasdktest.Server, []byte) {
opaControlPlane := opasdktest.MustNewServer(
opasdktest.MockBundle("/bundles/test", map[string]string{
Expand Down Expand Up @@ -215,6 +224,15 @@ func mockControlPlaneWithResourceBundle(opts ...ControlPlaneOption) (*opasdktest
`
}

statusConfig := ""
if cfg.enableStatus {
statusConfig = `
"status": {
"console": true
},
`
}

config := []byte(fmt.Sprintf(`{
"services": {
"test": {
Expand All @@ -227,14 +245,15 @@ func mockControlPlaneWithResourceBundle(opts ...ControlPlaneOption) (*opasdktest
}
},
%s
%s
"plugins": {
"envoy_ext_authz_grpc": {
"path": "envoy/authz/allow",
"dry-run": false,
"skip-request-body-parse": false
}
}
}`, opaControlPlane.URL(), jwtCacheConfig))
}`, opaControlPlane.URL(), jwtCacheConfig, statusConfig))

return opaControlPlane, config
}
Expand Down Expand Up @@ -1078,6 +1097,42 @@ func TestBodyExtractionUnknownBody(t *testing.T) {
f2()
}

func TestPrometheusPluginStatusGaugeRegistered(t *testing.T) {
_, config := mockControlPlaneWithResourceBundle(WithStatusPluginEnabled(true))

reg := prometheus.NewRegistry()
registry, err := NewOpenPolicyAgentRegistry(
WithReuseDuration(1*time.Second),
WithCleanInterval(1*time.Second),
WithOpenPolicyAgentInstanceConfig(WithConfigTemplate(config)),
WithPrometheusRegisterer(reg),
)
assert.NoError(t, err)

inst, err := registry.GetOrStartInstance("test")
assert.NoError(t, err)

// Simulate an HTTP request evaluation
ctx := context.Background()
_, err = inst.Eval(ctx, &authv3.CheckRequest{
Attributes: &authv3.AttributeContext{},
})
assert.NoError(t, err)

// Gather metrics and check for plugin_status_gauge
metricsFamilies, err := reg.Gather()
assert.NoError(t, err)

found := false
for _, mf := range metricsFamilies {
if mf.GetName() == "plugin_status_gauge" && len(mf.Metric) > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could also verify the labels for the metrics as this is also part of the interface to the outside

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch

found = true
break
}
}
assert.True(t, found, "plugin_status_gauge should be registered and have data")
}

type opaInstanceStartupTestCase struct {
enableCustomControlLoop bool
expectedError string
Expand Down
7 changes: 7 additions & 0 deletions metrics/all_kind.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package metrics
import (
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
)

type All struct {
Expand Down Expand Up @@ -146,6 +148,11 @@ func (a *All) Close() {
a.prometheus.Close()
}

// Implements the PrometheusMetrics interface
func (a *All) ScopedPrometheusRegisterer(subsystem string) prometheus.Registerer {
return a.prometheus.ScopedPrometheusRegisterer(subsystem)
}

func (a *All) RegisterHandler(path string, handler *http.ServeMux) {
a.prometheusHandler = a.prometheus.getHandler()
a.codaHaleHandler = a.codaHale.getHandler(path)
Expand Down
8 changes: 8 additions & 0 deletions metrics/all_kind_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package metrics

import "testing"

// Compile-time check that All implements PrometheusMetrics
func TestAllImplementsPrometheusMetrics(t *testing.T) {
var _ PrometheusMetrics = (*All)(nil)
}
8 changes: 8 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ type Metrics interface {
Close()
}

// PrometheusMetrics is an optional interface that a Metrics flavour can implement.
// This can only credibly be implemented with Prometheus as a backend and can also be removed if Prometheus
// is the only supported backend and Codahale support is dropped
type PrometheusMetrics interface {
Metrics
ScopedPrometheusRegisterer(subsystem string) prometheus.Registerer
}

// Options for initializing metrics collection.
type Options struct {
// the metrics exposing format.
Expand Down
23 changes: 15 additions & 8 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,30 +78,32 @@ type Prometheus struct {
invalidRoutesMu sync.RWMutex
invalidRouteLabels map[string][]prometheus.Labels // routeId -> []labels

opts Options
registry *prometheus.Registry
handler http.Handler
opts Options
registry *prometheus.Registry
handler http.Handler
namespace string
}

// NewPrometheus returns a new Prometheus metric backend.
func NewPrometheus(opts Options) *Prometheus {
opts = applyCompatibilityDefaults(opts)

namespace := promNamespace
if opts.Prefix != "" {
namespace = strings.TrimSuffix(opts.Prefix, ".")
}

p := &Prometheus{
registry: opts.PrometheusRegistry,
opts: opts,
invalidRouteLabels: make(map[string][]prometheus.Labels),
namespace: namespace,
}

if p.registry == nil {
p.registry = prometheus.NewRegistry()
}

namespace := promNamespace
if opts.Prefix != "" {
namespace = strings.TrimSuffix(opts.Prefix, ".")
}

p.routeLookupM = register(p, prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: promRouteSubsystem,
Expand Down Expand Up @@ -555,6 +557,11 @@ func (p *Prometheus) DeleteInvalidRoute(routeId string) {

func (p *Prometheus) Close() {}

// Implements the PrometheusMetrics interface
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Godoc should start by the func name

func (p *Prometheus) ScopedPrometheusRegisterer(subsystem string) prometheus.Registerer {
return prometheus.WrapRegistererWithPrefix(p.namespace+"_"+subsystem+"_", p.registry)
}

// withStartLabelGatherer adds a "start" label to all counters with
// the value of counter creation timestamp as unix nanoseconds.
type withStartLabelGatherer struct {
Expand Down
37 changes: 37 additions & 0 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zalando/skipper/metrics"
Expand Down Expand Up @@ -1302,3 +1303,39 @@ func TestPrometheusMetricsStartTimestamp(t *testing.T) {
checkMetric(`skipper_serve_host_count{code="201",host="bar_test",method="POST",start="(\d+)"} 2`)
checkMetric(`skipper_route_error_total{start="(\d+)"} 3`)
}

// Compile-time check that Prometheus implements PrometheusMetrics
func TestPrometheusImplementsPrometheusMetrics(t *testing.T) {
var _ metrics.PrometheusMetrics = (*metrics.Prometheus)(nil)
}

func TestScopedPrometheusRegistererPrefix(t *testing.T) {
reg := prometheus.NewRegistry()
pm := metrics.NewPrometheus(metrics.Options{
PrometheusRegistry: reg,
Prefix: "customprefix",
})

registerer := pm.ScopedPrometheusRegisterer("mysubsystem")
counter := prometheus.NewCounter(prometheus.CounterOpts{
Name: "mycounter",
Help: "test counter",
})
registerer.MustRegister(counter)

metricsFamilies, err := reg.Gather()
if err != nil {
t.Fatalf("failed to gather metrics: %v", err)
}

found := false
for _, mf := range metricsFamilies {
if mf.GetName() == "customprefix_mysubsystem_mycounter" {
found = true
break
}
}
if !found {
t.Errorf("expected metric with name 'customprefix_mysubsystem_mycounter' to be registered")
}
}
Loading