Skip to content
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b066750
Using otlphttp exporter for self metrics
rafaelwestphal Oct 15, 2025
3e29132
Adding the necessary transformers for agent metric
rafaelwestphal Oct 16, 2025
0969945
Removing extensions when unnecessar
rafaelwestphal Oct 16, 2025
06ae486
Include selfmetrics
rafaelwestphal Oct 17, 2025
6b8674d
Fixing golden
rafaelwestphal Oct 17, 2025
8901822
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Oct 17, 2025
486f335
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Oct 21, 2025
2ca0535
MErging file
rafaelwestphal Oct 22, 2025
68f9c4d
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Oct 22, 2025
27f1487
Fix lint
rafaelwestphal Oct 22, 2025
7c6ab48
Refactoring how to run featureflag tests
rafaelwestphal Oct 23, 2025
0203366
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Oct 31, 2025
fb747cc
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 4, 2025
3d60aa6
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 5, 2025
75b3426
Resuing some processor
rafaelwestphal Nov 5, 2025
dad0225
Refactoring code and removing comments
rafaelwestphal Nov 12, 2025
d06c53e
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 12, 2025
4b1d06d
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 12, 2025
958fb37
Copying host.id to instance_id to bypass bug 459468648
rafaelwestphal Nov 12, 2025
49d5d56
upDating golden
rafaelwestphal Nov 13, 2025
0f1b0d2
Fixing tranformation test
rafaelwestphal Nov 13, 2025
72f0623
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 19, 2025
193dd21
Adding system specific changes
rafaelwestphal Nov 19, 2025
edfe554
formating
rafaelwestphal Nov 19, 2025
db9427a
Readjust when the change type from dobule to int
rafaelwestphal Nov 27, 2025
a87cb54
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 27, 2025
87f07ee
update golden
rafaelwestphal Nov 27, 2025
d85b391
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Nov 28, 2025
aab9b17
Fixing merge
rafaelwestphal Nov 28, 2025
ecbc31f
Removing ProjectName since we are obatinng inside the aux funt
rafaelwestphal Nov 28, 2025
939da8e
Removing unnecessary ProjectNaME
rafaelwestphal Dec 1, 2025
9f32772
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Dec 1, 2025
460ca97
update golden
rafaelwestphal Dec 1, 2025
1cf2746
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Dec 5, 2025
06a6d3f
Reverting change to scaler transformation
rafaelwestphal Dec 5, 2025
765ab8d
Add the other changes
rafaelwestphal Dec 5, 2025
3b0a878
Reveting bad scalar changew
rafaelwestphal Dec 5, 2025
0d45cb4
Adding metricstarttime when using otlphtrp
rafaelwestphal Dec 8, 2025
d4224ac
Merge branch 'master' into westphalrafael-agent-otlp
rafaelwestphal Dec 8, 2025
936ae3d
Adding function
rafaelwestphal Dec 8, 2025
d387e0a
Added metricstarttime exporter to the end of the pipelines
rafaelwestphal Dec 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
47 changes: 27 additions & 20 deletions confgenerator/agentmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package confgenerator

import (
"context"
"fmt"
"path/filepath"
"time"
Expand Down Expand Up @@ -56,9 +57,9 @@ var grpcToHTTPStatus = map[string]string{
"DEADLINE_EXCEEDED": "504",
}

func (r AgentSelfMetrics) AddSelfMetricsPipelines(receiverPipelines map[string]otel.ReceiverPipeline, pipelines map[string]otel.Pipeline) {
func (r AgentSelfMetrics) AddSelfMetricsPipelines(receiverPipelines map[string]otel.ReceiverPipeline, pipelines map[string]otel.Pipeline, ctx context.Context) {
// Receiver pipelines names should have 1 underscore to avoid collision with user configurations.
receiverPipelines["agent_prometheus"] = r.PrometheusMetricsPipeline()
receiverPipelines["agent_prometheus"] = r.PrometheusMetricsPipeline(ctx)

// Pipeline names should have no underscores to avoid collision with user configurations.
pipelines["otel"] = otel.Pipeline{
Expand All @@ -79,15 +80,15 @@ func (r AgentSelfMetrics) AddSelfMetricsPipelines(receiverPipelines map[string]o
Processors: r.LoggingMetricsPipelineProcessors(),
}

receiverPipelines["ops_agent"] = r.OpsAgentPipeline()
receiverPipelines["ops_agent"] = r.OpsAgentPipeline(ctx)
pipelines["opsagent"] = otel.Pipeline{
Type: "metrics",
ReceiverPipelineName: "ops_agent",
}
}

func (r AgentSelfMetrics) PrometheusMetricsPipeline() otel.ReceiverPipeline {
return otel.ReceiverPipeline{
func (r AgentSelfMetrics) PrometheusMetricsPipeline(ctx context.Context) otel.ReceiverPipeline {
return ConvertGCMSystemExporterToOtlpExporter(otel.ReceiverPipeline{
Receiver: otel.Component{
Type: "prometheus",
Config: map[string]interface{}{
Expand Down Expand Up @@ -128,7 +129,7 @@ func (r AgentSelfMetrics) PrometheusMetricsPipeline() otel.ReceiverPipeline {
),
},
},
}
}, ctx)
}

func (r AgentSelfMetrics) OtelPipelineProcessors() []otel.Component {
Expand Down Expand Up @@ -218,20 +219,14 @@ func (r AgentSelfMetrics) LoggingMetricsPipelineProcessors() []otel.Component {
// Format fluentbit and otel logging metrics before aggregation.
otel.MetricsTransform(
otel.RenameMetric("fluentbit_stackdriver_retried_records_total", "fluentbit_log_entry_retry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.RenameLabel("status", "response_code"),
otel.AggregateLabels("sum", "response_code"),
),
otel.DuplicateMetric("otelcol_exporter_send_failed_log_records", "otel_log_entry_retry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.AddLabel("response_code", "400"),
otel.AggregateLabels("sum", "response_code"),
),
otel.RenameMetric("fluentbit_stackdriver_requests_total", "fluentbit_request_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.RenameLabel("status", "response_code"),
otel.AggregateLabels("sum", "response_code"),
),
Expand All @@ -243,19 +238,16 @@ func (r AgentSelfMetrics) LoggingMetricsPipelineProcessors() []otel.Component {
),
otel.RenameMetric("fluentbit_stackdriver_proc_records_total", "fluentbit_log_entry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.RenameLabel("status", "response_code"),
otel.AggregateLabels("sum", "response_code"),
),
otel.RenameMetric("otelcol_exporter_sent_log_records", "otel_log_entry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.AddLabel("response_code", "200"),
otel.AggregateLabels("sum", "response_code"),
),
otel.RenameMetric("otelcol_exporter_send_failed_log_records", "otel_log_entry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
otel.AddLabel("response_code", "400"),
otel.AggregateLabels("sum", "response_code"),
),
Expand Down Expand Up @@ -297,24 +289,39 @@ func (r AgentSelfMetrics) LoggingMetricsPipelineProcessors() []otel.Component {
// DeltaToCumulative keeps in memory information of previous delta points
// to generate a valid cumulative monotonic metric.
otel.DeltaToCumulative(),
otel.MetricStartTime(),
otel.MetricsTransform(
otel.UpdateMetric("agent/log_entry_retry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
),
otel.UpdateMetric("agent/request_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
),
otel.UpdateMetric("agent/log_entry_count",
// change data type from double -> int64
otel.ToggleScalarDataType,
),
),
// The processor "interval" outputs the last point in each 1 minute interval.
otel.Interval("1m"),
otel.MetricsTransform(otel.AddPrefix("agent.googleapis.com")),
}
}

func (r AgentSelfMetrics) OpsAgentPipeline() otel.ReceiverPipeline {
receiver_config := map[string]any{
func (r AgentSelfMetrics) OpsAgentPipeline(ctx context.Context) otel.ReceiverPipeline {
receiverConfig := map[string]any{
"include": []string{
filepath.Join(r.OtelRuntimeDir, "enabled_receivers_otlp.json"),
filepath.Join(r.OtelRuntimeDir, "feature_tracking_otlp.json")},
"replay_file": true,
"poll_interval": time.Duration(60 * time.Second).String(),
}
return otel.ReceiverPipeline{
return ConvertGCMSystemExporterToOtlpExporter(otel.ReceiverPipeline{
Receiver: otel.Component{
Type: "otlpjsonfile",
Config: receiver_config,
Config: receiverConfig,
},
ExporterTypes: map[string]otel.ExporterType{
"metrics": otel.System,
Expand All @@ -324,7 +331,7 @@ func (r AgentSelfMetrics) OpsAgentPipeline() otel.ReceiverPipeline {
otel.Transform("metric", "datapoint", []ottl.Statement{"set(time, Now())"}),
},
},
}
}, ctx)
}

// intentionally not registered as a component because this is not created by users
38 changes: 23 additions & 15 deletions confgenerator/confgenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,32 +57,40 @@ func googleCloudExporter(userAgent string, instrumentationLabels bool, serviceRe
}
}

func ConvertPrometheusExporterToOtlpExporter(receiver otel.ReceiverPipeline, ctx context.Context) otel.ReceiverPipeline {
return ConvertToOtlpExporter(receiver, ctx, true)
func ConvertPrometheusExporterToOtlpExporter(pipeline otel.ReceiverPipeline, ctx context.Context) otel.ReceiverPipeline {
return ConvertToOtlpExporter(pipeline, ctx, true, false)
}

func ConvertGCMOtelExporterToOtlpExporter(receiver otel.ReceiverPipeline, ctx context.Context) otel.ReceiverPipeline {
return ConvertToOtlpExporter(receiver, ctx, false)
func ConvertGCMOtelExporterToOtlpExporter(pipeline otel.ReceiverPipeline, ctx context.Context) otel.ReceiverPipeline {
return ConvertToOtlpExporter(pipeline, ctx, false, false)
}

func ConvertToOtlpExporter(receiver otel.ReceiverPipeline, ctx context.Context, isPrometheus bool) otel.ReceiverPipeline {
func ConvertGCMSystemExporterToOtlpExporter(pipeline otel.ReceiverPipeline, ctx context.Context) otel.ReceiverPipeline {
return ConvertToOtlpExporter(pipeline, ctx, false, true)
}

func ConvertToOtlpExporter(pipeline otel.ReceiverPipeline, ctx context.Context, isPrometheus bool, isSystem bool) otel.ReceiverPipeline {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is great that now all logic to setup the OTLP exporter instead of the GCM exporter is contained within ConvertToOtlpExporter!

Question

I want to followup on this comment made above :

The problem is that the transformation is different depending if the current exporter is configured as a system or not. If I just declare it as OTLP, I don' t know how to properly setup the pipeline later. It can either be a GMP, GCM system or GCM Otel exporter.

When the migration to the OTLP exporter is fully completed and we remove all the other exporters (System, OTel , GMP) :

  • How are we going to determine which combination of processors (isSystem, isPrometheus) should be applied to each metric pipeline ?
  • Should we add another ReceiverPipeline "property" to differentiate them ?
  • When we remove the GMP exporter, how is the function ConvertPrometheusExporterToOtlpExporter going to be renamed/refactored ?

I bring up this questions, since i feel the solution of using a "converter" (ConvertToOtlpExporter) is considering only the intermediate state where GCM is the main exporter, but will need further refactoring when those are removed. My suggestion here aims to find a solution that won't need any refactoring later.

Suggestion

One idea is to have 3 exporter types which help determine the specific processors needed for each pipeline (while keeping the possibility to store the logic in one place ) :

  • System_OTLP
  • GMP_OTLP
  • OTLP

This keeps the abstraction of a "ReceiverPipeline" while also having a way to differentiate the specific requirements of each metric pipeline.

Copy link
Contributor

@franciscovalentecastro franciscovalentecastro Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Synced offline with @rafaelwestphal. Mentioned eventually we are going to consolidate all OTLP exporter processor logic when the migration is complete. Using a ConvertToOtlpExporter is considered an intermediate step since it enables development to happen for each receiver separately.

expOtlpExporter := experimentsFromContext(ctx)["otlp_exporter"]
resource, _ := platform.FromContext(ctx).GetResource()
if !expOtlpExporter {
return receiver
return pipeline
}
_, err := receiver.ExporterTypes["metrics"]
_, err := pipeline.ExporterTypes["metrics"]
if !err {
return receiver
return pipeline
}
receiver.ExporterTypes["metrics"] = otel.OTLP
pipeline.ExporterTypes["metrics"] = otel.OTLP

receiver.Processors["metrics"] = append(receiver.Processors["metrics"], otel.GCPProjectID(resource.ProjectName()))
pipeline.Processors["metrics"] = append(pipeline.Processors["metrics"], otel.GCPProjectID(resource.ProjectName()))
if isSystem {
pipeline.Processors["metrics"] = append(pipeline.Processors["metrics"], otel.MetricStartTime())
pipeline.Processors["metrics"] = append(pipeline.Processors["metrics"], otel.MetricsRemoveInstrumentationLibraryLabelsAttributes())
}
if isPrometheus {
receiver.Processors["metrics"] = append(receiver.Processors["metrics"], otel.MetricUnknownCounter())
receiver.Processors["metrics"] = append(receiver.Processors["metrics"], otel.MetricStartTime())
pipeline.Processors["metrics"] = append(pipeline.Processors["metrics"], otel.MetricUnknownCounter())
pipeline.Processors["metrics"] = append(pipeline.Processors["metrics"], otel.MetricStartTime())
}
return receiver
return pipeline
}

func otlpExporter(userAgent string) otel.Component {
Expand Down Expand Up @@ -127,6 +135,7 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir string)
userAgent, _ := p.UserAgent("Google-Cloud-Ops-Agent-Metrics")
metricVersionLabel, _ := p.VersionLabel("google-cloud-ops-agent-metrics")
loggingVersionLabel, _ := p.VersionLabel("google-cloud-ops-agent-logging")
expOtlpExporter := experimentsFromContext(ctx)["otlp_exporter"]

receiverPipelines, pipelines, err := uc.generateOtelPipelines(ctx)
if err != nil {
Expand All @@ -141,9 +150,8 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir string)
OtelRuntimeDir: outDir,
OtelLogging: uc.Logging.Service.OTelLogging,
}
agentSelfMetrics.AddSelfMetricsPipelines(receiverPipelines, pipelines)
agentSelfMetrics.AddSelfMetricsPipelines(receiverPipelines, pipelines, ctx)

expOtlpExporter := experimentsFromContext(ctx)["otlp_exporter"]
extensions := map[string]interface{}{}
if expOtlpExporter {
extensions["googleclientauth"] = map[string]interface{}{}
Expand Down
7 changes: 7 additions & 0 deletions confgenerator/otel/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,13 @@ func MetricsRemoveServiceAttributes() Component {
}
}

func MetricsRemoveInstrumentationLibraryLabelsAttributes() Component {
return TransformationMetrics(
SetScopeName(""),
SetScopeVersion(""),
)
}

func CopyHostIDToInstanceID() Component {
return Component{
Type: "transform",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ processors:
- otelcol_process_memory_rss
- grpc.client.attempt.duration_count
- googlecloudmonitoring/point_count
interval/loggingmetrics_7:
interval/loggingmetrics_9:
interval: 1m
metricstarttime/loggingmetrics_7:
strategy: subtract_initial_point
metricstransform/fluentbit_1:
transforms:
- action: update
Expand Down Expand Up @@ -384,13 +386,18 @@ processors:
include: ^(.*)$$
match_type: regexp
new_name: agent.googleapis.com/$${1}
metricstransform/loggingmetrics_10:
transforms:
- action: update
include: ^(.*)$$
match_type: regexp
new_name: agent.googleapis.com/$${1}
metricstransform/loggingmetrics_3:
transforms:
- action: update
include: fluentbit_stackdriver_retried_records_total
new_name: fluentbit_log_entry_retry_count
operations:
- action: toggle_scalar_data_type
- action: update_label
label: status
new_label: response_code
Expand All @@ -402,7 +409,6 @@ processors:
include: otelcol_exporter_send_failed_log_records
new_name: otel_log_entry_retry_count
operations:
- action: toggle_scalar_data_type
- action: add_label
new_label: response_code
new_value: "400"
Expand All @@ -414,7 +420,6 @@ processors:
include: fluentbit_stackdriver_requests_total
new_name: fluentbit_request_count
operations:
- action: toggle_scalar_data_type
- action: update_label
label: status
new_label: response_code
Expand Down Expand Up @@ -474,7 +479,6 @@ processors:
include: fluentbit_stackdriver_proc_records_total
new_name: fluentbit_log_entry_count
operations:
- action: toggle_scalar_data_type
- action: update_label
label: status
new_label: response_code
Expand All @@ -486,7 +490,6 @@ processors:
include: otelcol_exporter_sent_log_records
new_name: otel_log_entry_count
operations:
- action: toggle_scalar_data_type
- action: add_label
new_label: response_code
new_value: "200"
Expand All @@ -498,7 +501,6 @@ processors:
include: otelcol_exporter_send_failed_log_records
new_name: otel_log_entry_count
operations:
- action: toggle_scalar_data_type
- action: add_label
new_label: response_code
new_value: "400"
Expand All @@ -519,9 +521,17 @@ processors:
metricstransform/loggingmetrics_8:
transforms:
- action: update
include: ^(.*)$$
match_type: regexp
new_name: agent.googleapis.com/$${1}
include: agent/log_entry_retry_count
operations:
- action: toggle_scalar_data_type
- action: update
include: agent/request_count
operations:
- action: toggle_scalar_data_type
- action: update
include: agent/log_entry_count
operations:
- action: toggle_scalar_data_type
metricstransform/otel_3:
transforms:
- action: update
Expand Down Expand Up @@ -692,8 +702,10 @@ service:
- cumulativetodelta/loggingmetrics_4
- transform/loggingmetrics_5
- deltatocumulative/loggingmetrics_6
- interval/loggingmetrics_7
- metricstarttime/loggingmetrics_7
- metricstransform/loggingmetrics_8
- interval/loggingmetrics_9
- metricstransform/loggingmetrics_10
- resourcedetection/_global_0
receivers:
- prometheus/agent_prometheus
Expand Down
Loading
Loading