Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions api/services/v1alpha1/monitoring_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ type Traces struct {
// +kubebuilder:validation:Pattern="^(0(\\.[0-9]+)?|1(\\.0+)?)$"
SampleRatio string `json:"sampleRatio,omitempty"`
// TLS configuration for Tempo gRPC connections
// +optional
TLS *TracesTLS `json:"tls,omitempty"`
// Exporters defines custom trace exporters for sending traces to external observability tools.
// Each key represents the exporter name, and the value contains the exporter configuration.
Expand All @@ -95,10 +96,10 @@ type Traces struct {
Exporters map[string]runtime.RawExtension `json:"exporters,omitempty"`
}

// TracesTLS defines TLS configuration for traces collection
// TracesTLS defines TLS configuration for trace ingestion and query APIs
type TracesTLS struct {
// Enabled enables TLS for Tempo gRPC connections
// +kubebuilder:default=true
// Enabled enables TLS for Tempo OTLP ingestion (gRPC/HTTP) and query APIs (HTTP)
// TLS is disabled by default to maintain backward compatibility
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment this flag sets both ingestion and querying to false, is this the expected result or would it be better to keep tls for ingestion and perses seperate?
There's currently no e2e test confirming that non-TLS trace ingestion actually works. The PR should add one to validate the tls.insecure: true path functions correctly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other then that lgtm

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review @den-rgb, I have added the two tests in ValidateOpenTelemetryCollectorConfigurations that validate both non-TLS (default) and TLS-enabled trace ingestion. Regarding the unified TLS flag, I kept ingestion and querying together for simplicity and consistency.

Enabled bool `json:"enabled,omitempty"`
// CertificateSecret specifies the name of the secret containing TLS certificates
// If not specified, OpenShift service serving certificates will be used
Expand All @@ -108,8 +109,18 @@ type TracesTLS struct {
CAConfigMap string `json:"caConfigMap,omitempty"`
}

// TracesStorage defines the storage configuration for tracing.
// +kubebuilder:validation:XValidation:rule="self.backend != 'pv' ? has(self.secret) : true", message="When backend is s3 or gcs, the 'secret' field must be specified and non-empty"
// Storage backend type constants
const (
// StorageBackendPV represents persistent volume storage backend
StorageBackendPV = "pv"
// StorageBackendS3 represents S3-compatible storage backend
StorageBackendS3 = "s3"
// StorageBackendGCS represents Google Cloud Storage backend
StorageBackendGCS = "gcs"
)

// TracesStorage defines the storage configuration for tracing
// +kubebuilder:validation:XValidation:rule="self.backend != 'pv' ? (has(self.secret) && self.secret != '') : true", message="When backend is s3 or gcs, the 'secret' field must be specified and non-empty"
// +kubebuilder:validation:XValidation:rule="self.backend != 'pv' ? !has(self.size) : true", message="Size is supported when backend is pv only"
type TracesStorage struct {
// Backend defines the storage backend type.
Expand Down
6 changes: 3 additions & 3 deletions docs/api-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -3039,7 +3039,7 @@ _Appears in:_



TracesStorage defines the storage configuration for tracing.
TracesStorage defines the storage configuration for tracing



Expand All @@ -3058,7 +3058,7 @@ _Appears in:_



TracesTLS defines TLS configuration for traces collection
TracesTLS defines TLS configuration for trace ingestion and query APIs



Expand All @@ -3067,7 +3067,7 @@ _Appears in:_

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `enabled` _boolean_ | Enabled enables TLS for Tempo gRPC connections | true | |
| `enabled` _boolean_ | Enabled enables TLS for Tempo OTLP ingestion (gRPC/HTTP) and query APIs (HTTP)<br />TLS is disabled by default to maintain backward compatibility | | |
| `certificateSecret` _string_ | CertificateSecret specifies the name of the secret containing TLS certificates<br />If not specified, OpenShift service serving certificates will be used | | |
| `caConfigMap` _string_ | CAConfigMap specifies the name of the ConfigMap containing the CA certificate<br />Required for mutual TLS authentication | | |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
PersesTempoDashboardTemplate = "resources/perses-tempo-dashboard.tmpl.yaml"
PersesDatasourcePrometheusTemplate = "resources/perses-datasource-prometheus.tmpl.yaml"
PrometheusClusterProxyTemplate = "resources/data-science-prometheus-cluster-proxy.tmpl.yaml"
TempoServiceCAConfigMapTemplate = "resources/tempo-service-ca-configmap.tmpl.yaml"

// Resource names.
PersesTempoDatasourceName = "tempo-datasource"
Expand Down Expand Up @@ -558,6 +559,10 @@ func deployPersesTempoIntegration(ctx context.Context, rr *odhtypes.Reconciliati
FS: resourcesFS,
Path: PersesTempoDatasourceTemplate,
},
{
FS: resourcesFS,
Path: TempoServiceCAConfigMapTemplate,
},
}

// Only deploy dashboard template if its CRD exists
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,17 +215,38 @@ func addTracesTemplateData(templateData map[string]any, traces *serviceApi.Trace
// Add retention for all backends (both TempoMonolithic and TempoStack)
templateData["TracesRetention"] = traces.Storage.Retention.Duration.String()

// Determine TLS enabled state for query endpoints (reuses existing TLS configuration)
tlsEnabled := determineTLSEnabled(traces)
templateData["TempoTLSEnabled"] = tlsEnabled

// Set TLS certificate configuration
if tlsEnabled {
// traces.TLS is guaranteed non-nil here since determineTLSEnabled returns false when TLS is nil
templateData["TempoCertificateSecret"] = traces.TLS.CertificateSecret
templateData["TempoCAConfigMap"] = traces.TLS.CAConfigMap
} else {
// Set empty values to avoid template missing key errors
templateData["TempoCertificateSecret"] = ""
templateData["TempoCAConfigMap"] = ""
}

// Use HTTPS for query endpoints when TLS is enabled (defaults to disabled)
protocol := "http"
if tlsEnabled {
protocol = "https"
}

// Add tempo-related data from traces.Storage fields (Storage is a struct, not a pointer)
switch traces.Storage.Backend {
case "pv":
case serviceApi.StorageBackendPV:
templateData["TempoEndpoint"] = fmt.Sprintf("tempo-data-science-tempomonolithic.%s.svc.cluster.local:4317", namespace)
// Perses datasource needs HTTP query endpoint (port 3200)
templateData["TempoQueryEndpoint"] = fmt.Sprintf("http://tempo-data-science-tempomonolithic.%s.svc.cluster.local:3200", namespace)
// Perses datasource query endpoint (port 3200) - uses HTTPS when TLS is enabled
templateData["TempoQueryEndpoint"] = fmt.Sprintf("%s://tempo-data-science-tempomonolithic.%s.svc.cluster.local:3200", protocol, namespace)
templateData["Size"] = traces.Storage.Size
case "s3", "gcs":
case serviceApi.StorageBackendS3, serviceApi.StorageBackendGCS:
templateData["TempoEndpoint"] = fmt.Sprintf("tempo-data-science-tempostack-gateway.%s.svc.cluster.local:4317", namespace)
// Perses datasource needs HTTP query endpoint via gateway (port 8080)
templateData["TempoQueryEndpoint"] = fmt.Sprintf("http://tempo-data-science-tempostack-gateway.%s.svc.cluster.local:8080", namespace)
// Perses datasource query endpoint via gateway (port 8080) - uses HTTPS when TLS is enabled
templateData["TempoQueryEndpoint"] = fmt.Sprintf("%s://tempo-data-science-tempostack-gateway.%s.svc.cluster.local:8080", protocol, namespace)
templateData["Secret"] = traces.Storage.Secret
}

Expand Down Expand Up @@ -328,7 +349,6 @@ func getTemplateData(ctx context.Context, rr *odhtypes.ReconciliationRequest) (m

// Add traces-related data if traces are configured
if traces := monitoring.Spec.Traces; traces != nil {
addTracesData(traces, monitoring.Spec.Namespace, templateData)
if err := addTracesTemplateData(templateData, traces, monitoring.Spec.Namespace); err != nil {
return nil, err
}
Expand Down Expand Up @@ -538,48 +558,15 @@ func addExportersData(metrics *serviceApi.Metrics, templateData map[string]any)
return nil
}

// addTracesData adds traces configuration data to the template data map.
func addTracesData(traces *serviceApi.Traces, namespace string, templateData map[string]any) {
templateData["OtlpEndpoint"] = fmt.Sprintf("http://data-science-collector.%s.svc.cluster.local:4317", namespace)
templateData["SampleRatio"] = traces.SampleRatio
templateData["Backend"] = traces.Storage.Backend // backend has default "pv" set in API

tlsEnabled := determineTLSEnabled(traces)
templateData["TempoTLSEnabled"] = tlsEnabled

if tlsEnabled && traces.TLS != nil {
templateData["TempoCertificateSecret"] = traces.TLS.CertificateSecret
templateData["TempoCAConfigMap"] = traces.TLS.CAConfigMap
} else {
// Set empty values to avoid template missing key errors
templateData["TempoCertificateSecret"] = ""
templateData["TempoCAConfigMap"] = ""
}

templateData["TracesRetention"] = traces.Storage.Retention.Duration.String()

setTempoEndpointAndStorageData(traces, namespace, templateData)
}

// determineTLSEnabled determines if TLS should be enabled for traces.
// TLS must be explicitly enabled via traces.TLS.Enabled field.
// Default is false to avoid Tempo operator certificate provisioning issues.
func determineTLSEnabled(traces *serviceApi.Traces) bool {
if traces.TLS != nil {
return traces.TLS.Enabled
}
return traces.Storage.Backend == "pv"
}

// setTempoEndpointAndStorageData sets the tempo endpoint and storage-specific data.
func setTempoEndpointAndStorageData(traces *serviceApi.Traces, namespace string, templateData map[string]any) {
switch traces.Storage.Backend {
case "pv":
templateData["TempoEndpoint"] = fmt.Sprintf("tempo-data-science-tempomonolithic.%s.svc.cluster.local:4317", namespace)
templateData["Size"] = traces.Storage.Size
case "s3", "gcs":
// Always use gateway endpoint for S3/GCS backends (required for OpenShift mode)
templateData["TempoEndpoint"] = fmt.Sprintf("tempo-data-science-tempostack-gateway.%s.svc.cluster.local:4317", namespace)
templateData["Secret"] = traces.Storage.Secret
}
// Default to false - user must explicitly enable TLS
return false
}

// getResourceValueOrDefault returns the resource value or a default if empty or zero.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,186 @@ func TestMonitoringStackThanosQuerierIntegration(t *testing.T) {
}
}

func TestDetermineTLSEnabled(t *testing.T) {
tests := []struct {
name string
traces *serviceApi.Traces
expected bool
}{
{
name: "TLS explicitly enabled",
traces: &serviceApi.Traces{
TLS: &serviceApi.TracesTLS{
Enabled: true,
},
Storage: serviceApi.TracesStorage{
Backend: "pv",
},
},
expected: true,
},
{
name: "TLS explicitly disabled",
traces: &serviceApi.Traces{
TLS: &serviceApi.TracesTLS{
Enabled: false,
},
Storage: serviceApi.TracesStorage{
Backend: "pv",
},
},
expected: false,
},
{
name: "TLS nil - PV backend defaults to false",
traces: &serviceApi.Traces{
Storage: serviceApi.TracesStorage{
Backend: "pv",
},
},
expected: false,
},
{
name: "TLS nil - S3 backend defaults to false",
traces: &serviceApi.Traces{
Storage: serviceApi.TracesStorage{
Backend: "s3",
},
},
expected: false,
},
{
name: "TLS nil - GCS backend defaults to false",
traces: &serviceApi.Traces{
Storage: serviceApi.TracesStorage{
Backend: "gcs",
},
},
expected: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := determineTLSEnabled(tt.traces)
if result != tt.expected {
t.Errorf("determineTLSEnabled() = %v, expected %v", result, tt.expected)
}
})
}
}

func TestAddTracesTemplateData_TLS(t *testing.T) {
tests := []struct {
name string
traces *serviceApi.Traces
namespace string
expectedTLSEnabled bool
expectedHTTPProtocol string
expectedPVEndpoint string
expectedS3Endpoint string
}{
{
name: "PV backend with TLS disabled (default)",
traces: &serviceApi.Traces{
SampleRatio: "0.1",
Storage: serviceApi.TracesStorage{
Backend: "pv",
Size: "5Gi",
Retention: metav1.Duration{Duration: 90 * 24 * 60 * 60 * 1000000000}, // 90 days in nanoseconds
},
},
namespace: "test-namespace",
expectedTLSEnabled: false,
expectedHTTPProtocol: "http",
expectedPVEndpoint: "http://tempo-data-science-tempomonolithic.test-namespace.svc.cluster.local:3200",
},
{
name: "PV backend with TLS explicitly disabled",
traces: &serviceApi.Traces{
SampleRatio: "0.1",
TLS: &serviceApi.TracesTLS{
Enabled: false,
},
Storage: serviceApi.TracesStorage{
Backend: "pv",
Size: "5Gi",
Retention: metav1.Duration{Duration: 90 * 24 * 60 * 60 * 1000000000},
},
},
namespace: "test-namespace",
expectedTLSEnabled: false,
expectedHTTPProtocol: "http",
expectedPVEndpoint: "http://tempo-data-science-tempomonolithic.test-namespace.svc.cluster.local:3200",
},
{
name: "S3 backend with TLS disabled (default)",
traces: &serviceApi.Traces{
SampleRatio: "0.1",
Storage: serviceApi.TracesStorage{
Backend: "s3",
Secret: "s3-secret",
Retention: metav1.Duration{Duration: 90 * 24 * 60 * 60 * 1000000000},
},
},
namespace: "test-namespace",
expectedTLSEnabled: false,
expectedHTTPProtocol: "http",
expectedS3Endpoint: "http://tempo-data-science-tempostack-gateway.test-namespace.svc.cluster.local:8080",
},
{
name: "S3 backend with TLS explicitly enabled",
traces: &serviceApi.Traces{
SampleRatio: "0.1",
TLS: &serviceApi.TracesTLS{
Enabled: true,
},
Storage: serviceApi.TracesStorage{
Backend: "s3",
Secret: "s3-secret",
Retention: metav1.Duration{Duration: 90 * 24 * 60 * 60 * 1000000000},
},
},
namespace: "test-namespace",
expectedTLSEnabled: true,
expectedHTTPProtocol: "https",
expectedS3Endpoint: "https://tempo-data-science-tempostack-gateway.test-namespace.svc.cluster.local:8080",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

templateData := make(map[string]any)
err := addTracesTemplateData(templateData, tt.traces, tt.namespace)
g.Expect(err).ShouldNot(HaveOccurred())

// Verify TLS enabled flag
tlsEnabled, exists := templateData["TempoTLSEnabled"]
g.Expect(exists).Should(BeTrue(), "TempoTLSEnabled should be set")
g.Expect(tlsEnabled).Should(Equal(tt.expectedTLSEnabled))

// Verify query endpoint URL based on backend
queryEndpoint, exists := templateData["TempoQueryEndpoint"]
g.Expect(exists).Should(BeTrue(), "TempoQueryEndpoint should be set")

switch tt.traces.Storage.Backend {
case serviceApi.StorageBackendPV:
g.Expect(queryEndpoint).Should(Equal(tt.expectedPVEndpoint))
case serviceApi.StorageBackendS3, serviceApi.StorageBackendGCS:
g.Expect(queryEndpoint).Should(Equal(tt.expectedS3Endpoint))
}

// Verify other template data fields are set
g.Expect(templateData).Should(HaveKey("OtlpEndpoint"))
g.Expect(templateData).Should(HaveKey("SampleRatio"))
g.Expect(templateData).Should(HaveKey("Backend"))
g.Expect(templateData).Should(HaveKey("TracesRetention"))
})
}
}

func TestIsLocalServiceEndpoint(t *testing.T) {
tests := []struct {
name string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,14 @@ spec:
kind: HTTPProxy
spec:
url: {{.TempoQueryEndpoint}}
{{- if .TempoTLSEnabled }}
client:
tls:
enable: true
caCert:
type: configmap
name: tempo-service-ca
certPath: service-ca.crt
{{- end }}
headers:
X-Scope-OrgID: {{.Namespace}}
Loading