Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -184,26 +184,42 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult:
serialized_data = encode_logs(batch).SerializeToString()
deadline_sec = time() + self._timeout
for retry_num in range(_MAX_RETRYS):
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return LogExportResult.SUCCESS
# multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff.
backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2)
try:
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return LogExportResult.SUCCESS
except requests.exceptions.RequestException as error:
reason = str(error)
retryable = True
status_code = None
else:
reason = resp.reason
retryable = _is_retryable(resp)
status_code = resp.status_code

if not retryable:
_logger.error(
"Failed to export logs batch code: %s, reason: %s",
status_code,
reason,
)
return LogExportResult.FAILURE

if (
not _is_retryable(resp)
or retry_num + 1 == _MAX_RETRYS
retry_num + 1 == _MAX_RETRYS
or backoff_seconds > (deadline_sec - time())
or self._shutdown
):
_logger.error(
"Failed to export logs batch code: %s, reason: %s",
resp.status_code,
resp.text,
"Failed to export logs batch due to timeout,"
"max retries or shutdown."
)
return LogExportResult.FAILURE
_logger.warning(
"Transient error %s encountered while exporting logs batch, retrying in %.2fs.",
resp.reason,
reason,
backoff_seconds,
)
shutdown = self._shutdown_is_occuring.wait(backoff_seconds)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,26 +231,41 @@ def export(
serialized_data = encode_metrics(metrics_data).SerializeToString()
deadline_sec = time() + self._timeout
for retry_num in range(_MAX_RETRYS):
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return MetricExportResult.SUCCESS
# multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff.
backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2)
try:
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return MetricExportResult.SUCCESS
except requests.exceptions.RequestException as error:
reason = str(error)
retryable = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe only mark it as retryable if it's a connection error to start out ? Otherwise this looks good to me. Also can we add a test that exercises this branch in the code...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you want me to catch the ConnectionError? :D

For testing as I already said I have a really hard time understanding whats going on :D So I would appreciate some help.

Copy link
Contributor

@DylanRussell DylanRussell Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I meant only make Connection error retryable, but leave the broad requests exception catch as you have it..

For testing you just need to add 1 test (maybe 2 if you special case Connection error as retryable to exercise that logic). Here is the span exporter test: https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py#L281-L304 -- you'll want to add basically duplicate tests to each of the 3 http exporters.. You can set up the mock post call to raise an exception (mock_post.side_effect = RequestsException), see (https://github.com/open-telemetry/opentelemetry-python/blob/main/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py#L288C9-L288C38)

IN order to run the tests make sure you have uv installed.. Then run uv sync at the top level directory (opentelemetry-python). Then activate the venv that uv creates (source .venv/bin/activate in the same directory).. Then you can run the unit tests via uv via tox -e py312-test-opentelemetry-exporter-otlp-proto-http -- you can run them for a different version of python too (run tox -l | grep http to see all python versions the tests can run again).. You can also have uv install python version if you haven't done that check out the docs for UV tool.

status_code = None
else:
reason = resp.reason
retryable = _is_retryable(resp)
status_code = resp.status_code

if not retryable:
_logger.error(
"Failed to export metrics batch code: %s, reason: %s",
status_code,
reason,
)
return MetricExportResult.FAILURE
if (
not _is_retryable(resp)
or retry_num + 1 == _MAX_RETRYS
retry_num + 1 == _MAX_RETRYS
or backoff_seconds > (deadline_sec - time())
or self._shutdown
):
_logger.error(
"Failed to export metrics batch code: %s, reason: %s",
resp.status_code,
resp.text,
"Failed to export metrics batch due to timeout,"
"max retries or shutdown."
)
return MetricExportResult.FAILURE
_logger.warning(
"Transient error %s encountered while exporting metrics batch, retrying in %.2fs.",
resp.reason,
reason,
backoff_seconds,
)
shutdown = self._shutdown_in_progress.wait(backoff_seconds)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,26 +179,42 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
serialized_data = encode_spans(spans).SerializePartialToString()
deadline_sec = time() + self._timeout
for retry_num in range(_MAX_RETRYS):
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return SpanExportResult.SUCCESS
# multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff.
backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2)
try:
resp = self._export(serialized_data, deadline_sec - time())
if resp.ok:
return SpanExportResult.SUCCESS
except requests.exceptions.RequestException as error:
reason = str(error)
retryable = True
status_code = None
else:
reason = resp.reason
retryable = _is_retryable(resp)
status_code = resp.status_code

if not retryable:
_logger.error(
"Failed to export span batch code: %s, reason: %s",
status_code,
reason,
)
return SpanExportResult.FAILURE

if (
not _is_retryable(resp)
or retry_num + 1 == _MAX_RETRYS
retry_num + 1 == _MAX_RETRYS
or backoff_seconds > (deadline_sec - time())
or self._shutdown
):
_logger.error(
"Failed to export span batch code: %s, reason: %s",
resp.status_code,
resp.text,
"Failed to export span batch due to timeout,"
"max retries or shutdown."
)
return SpanExportResult.FAILURE
_logger.warning(
"Transient error %s encountered while exporting span batch, retrying in %.2fs.",
resp.reason,
reason,
backoff_seconds,
)
shutdown = self._shutdown_in_progress.wait(backoff_seconds)
Expand Down
Loading