Skip to content

Commit 55cc9f1

Browse files
QuantumplationKtorZ
authored andcommitted
Wait for traces and metrics on close
Because we're using the batch provider, and span information is sent when the span *exits*, if we just let the process exit immediately, we might lose some tracing data; The [recommended pattern](open-telemetry/opentelemetry-rust#1961 (comment)) is to hold onto the providers and shut them down manually as the process exits. This will wait for any spans to finish shipping and avoid losing data. Note, that we might want another pass at this in the future: - integrate it into the panic handler that I added in another branch - integrate something like [Tokio Graceful Shutdown](https://docs.rs/tokio-graceful-shutdown/latest/tokio_graceful_shutdown/) to intercept ctrl+C and the like - add a timeout, so that a stalled metrics writer doesn't wait forever I kept it simple for this PR, but just something we should keep in mind
1 parent e230b96 commit 55cc9f1

File tree

1 file changed

+44
-22
lines changed

1 file changed

+44
-22
lines changed

crates/amaru/src/bin/amaru/main.rs

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use clap::{Parser, Subcommand};
2+
use miette::IntoDiagnostic;
23
use opentelemetry::metrics::Counter;
4+
use opentelemetry_sdk::{metrics::SdkMeterProvider, trace::TracerProvider};
35
use panic::panic_handler;
46
use std::env;
57

@@ -32,17 +34,22 @@ struct Cli {
3234
async fn main() -> miette::Result<()> {
3335
panic_handler();
3436

35-
let counter = setup_tracing();
37+
let (tracing, metrics, counter) = setup_tracing();
3638

3739
let args = Cli::parse();
3840

39-
match args.command {
41+
let result = match args.command {
4042
Command::Daemon(args) => cmd::daemon::run(args, counter).await,
4143
Command::Import(args) => cmd::import::run(args).await,
42-
}
44+
};
45+
46+
// TODO: we might also want to integrate this into a graceful shutdown system, and into a panic hook
47+
teardown_tracing(tracing, metrics)?;
48+
49+
result
4350
}
4451

45-
pub fn setup_tracing() -> Counter<u64> {
52+
pub fn setup_tracing() -> (TracerProvider, SdkMeterProvider, Counter<u64>) {
4653
use opentelemetry::{metrics::MeterProvider, trace::TracerProvider as _, KeyValue};
4754
use opentelemetry_sdk::{metrics::Temporality, Resource};
4855
use tracing_subscriber::{prelude::*, *};
@@ -72,22 +79,19 @@ pub fn setup_tracing() -> Counter<u64> {
7279
let resource = Resource::new(vec![KeyValue::new("service.name", SERVICE_NAME)]);
7380

7481
// Traces & span
75-
let opentelemetry_layer = tracing_opentelemetry::layer()
76-
.with_tracer(
77-
opentelemetry_sdk::trace::TracerProvider::builder()
78-
.with_resource(resource.clone())
79-
.with_batch_exporter(
80-
opentelemetry_otlp::SpanExporter::builder()
81-
.with_tonic()
82-
.build()
83-
.unwrap_or_else(|e| {
84-
panic!("failed to setup opentelemetry span exporter: {e}")
85-
}),
86-
opentelemetry_sdk::runtime::Tokio,
87-
)
82+
let opentelemetry_provider = opentelemetry_sdk::trace::TracerProvider::builder()
83+
.with_resource(resource.clone())
84+
.with_batch_exporter(
85+
opentelemetry_otlp::SpanExporter::builder()
86+
.with_tonic()
8887
.build()
89-
.tracer(SERVICE_NAME),
88+
.unwrap_or_else(|e| panic!("failed to setup opentelemetry span exporter: {e}")),
89+
opentelemetry_sdk::runtime::Tokio,
9090
)
91+
.build();
92+
let opentelemetry_tracer = opentelemetry_provider.tracer(SERVICE_NAME);
93+
let opentelemetry_layer = tracing_opentelemetry::layer()
94+
.with_tracer(opentelemetry_tracer)
9195
.with_filter(filter(AMARU_LOG));
9296

9397
// Metrics
@@ -105,14 +109,14 @@ pub fn setup_tracing() -> Counter<u64> {
105109
)
106110
.build();
107111

108-
let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
112+
let metrics_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
109113
.with_reader(metric_reader)
110114
.with_resource(resource)
111115
.build();
112116

113-
let meter = provider.meter("amaru");
117+
let meter = metrics_provider.meter("amaru");
114118

115-
opentelemetry::global::set_meter_provider(provider);
119+
opentelemetry::global::set_meter_provider(metrics_provider.clone());
116120

117121
// Subscriber
118122
tracing_subscriber::registry()
@@ -127,5 +131,23 @@ pub fn setup_tracing() -> Counter<u64> {
127131

128132
let counter = meter.u64_counter("block.count").build();
129133

130-
counter
134+
(opentelemetry_provider, metrics_provider, counter)
135+
}
136+
137+
pub fn teardown_tracing(tracing: TracerProvider, metrics: SdkMeterProvider) -> miette::Result<()> {
138+
// Shut down the providers so that it flushes any remaining spans
139+
// TODO: we might also want to wrap this in a timeout, so we don't hold the process open forever?
140+
tracing.shutdown().into_diagnostic()?;
141+
metrics.shutdown().into_diagnostic()?;
142+
143+
// This appears to be a deprecated method that will be removed soon
144+
// and just *releases* a reference to it, but doesn't actually call shutdown
145+
// still, we call it just in case until it gets removed
146+
// See:
147+
// https://github.com/tokio-rs/tracing-opentelemetry/issues/159
148+
// https://github.com/tokio-rs/tracing-opentelemetry/pull/175
149+
// https://github.com/open-telemetry/opentelemetry-rust/issues/1961
150+
opentelemetry::global::shutdown_tracer_provider();
151+
152+
Ok(())
131153
}

0 commit comments

Comments
 (0)