-
Notifications
You must be signed in to change notification settings - Fork 6
Handling transient cases when reading a secret #1298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: rc-v0.6.7
Are you sure you want to change the base?
Changes from 4 commits
2c2c53f
54d4e6d
f8e47a9
5bf440e
372611c
42f2988
8a0e617
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| package co.worklytics.psoxy.gateway; | ||
|
|
||
| /** | ||
| * Signals that a config/secret backend had a transient failure (credential rotation, network | ||
| * blip, service hiccup) and the value may still be accessible on the next attempt. | ||
| * | ||
| * Distinct from a missing value ({@code Optional.empty()} / {@code NEGATIVE_VALUE}): callers | ||
| * should NOT treat this as "property not configured" — they should retry or serve a cached value. | ||
| */ | ||
| public class TransientConfigException extends RuntimeException { | ||
|
|
||
| public TransientConfigException(String message, Throwable cause) { | ||
| super(message, cause); | ||
| } | ||
|
|
||
| public TransientConfigException(String message) { | ||
| super(message); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,21 +9,51 @@ | |
| import java.util.concurrent.TimeUnit; | ||
| import java.util.stream.Collectors; | ||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.base.Ticker; | ||
| import com.google.common.cache.CacheBuilder; | ||
| import com.google.common.cache.CacheLoader; | ||
| import com.google.common.cache.LoadingCache; | ||
| import com.google.common.util.concurrent.Futures; | ||
| import com.google.common.util.concurrent.ListenableFuture; | ||
| import com.google.common.util.concurrent.UncheckedExecutionException; | ||
| import co.worklytics.psoxy.gateway.ConfigService; | ||
| import co.worklytics.psoxy.gateway.SecretStore; | ||
| import co.worklytics.psoxy.gateway.TransientConfigException; | ||
| import co.worklytics.psoxy.gateway.WritableConfigService; | ||
| import lombok.RequiredArgsConstructor; | ||
| import lombok.SneakyThrows; | ||
| import lombok.extern.java.Log; | ||
| import org.jspecify.annotations.NonNull; | ||
|
|
||
| import java.util.logging.Level; | ||
|
aperez-worklytics marked this conversation as resolved.
|
||
|
|
||
| @RequiredArgsConstructor | ||
|
|
||
| @Log | ||
| public class CachingConfigServiceDecorator implements WritableConfigService, SecretStore { | ||
|
|
||
| static final int MAX_TRANSIENT_RETRIES = 3; | ||
| static final long DEFAULT_TRANSIENT_RETRY_DELAY_MS = 500L; | ||
|
|
||
| final ConfigService delegate; | ||
| final Duration defaultTtl; | ||
| final Ticker ticker; | ||
| final long transientRetryDelayMs; | ||
|
|
||
| public CachingConfigServiceDecorator(ConfigService delegate, Duration defaultTtl) { | ||
| this(delegate, defaultTtl, Ticker.systemTicker(), DEFAULT_TRANSIENT_RETRY_DELAY_MS); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| CachingConfigServiceDecorator(ConfigService delegate, Duration defaultTtl, Ticker ticker) { | ||
| this(delegate, defaultTtl, ticker, DEFAULT_TRANSIENT_RETRY_DELAY_MS); | ||
| } | ||
|
|
||
| @VisibleForTesting | ||
| CachingConfigServiceDecorator(ConfigService delegate, Duration defaultTtl, Ticker ticker, long transientRetryDelayMs) { | ||
| this.delegate = delegate; | ||
| this.defaultTtl = defaultTtl; | ||
| this.ticker = ticker; | ||
| this.transientRetryDelayMs = transientRetryDelayMs; | ||
| } | ||
|
|
||
| private volatile LoadingCache<ConfigProperty, String> cache; | ||
|
|
||
|
|
@@ -39,12 +69,58 @@ LoadingCache<ConfigProperty, String> getCache() { | |
| if (this.cache == null) { | ||
| this.cache = CacheBuilder.newBuilder() | ||
| .maximumSize(100) | ||
| .expireAfterWrite(defaultTtl.getSeconds(), TimeUnit.SECONDS) | ||
| .ticker(ticker) | ||
| .refreshAfterWrite(defaultTtl.getSeconds(), TimeUnit.SECONDS) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree this is better. |
||
| .recordStats() | ||
| .build(new CacheLoader<ConfigProperty, String>() { //req for java8-backwards compatibility | ||
| @Override | ||
| public String load(ConfigProperty key) { | ||
| return delegate.getConfigPropertyAsOptional(key).orElse(NEGATIVE_VALUE); | ||
| public String load(@NonNull ConfigProperty key) { | ||
| TransientConfigException lastException = null; | ||
|
Comment on lines
77
to
+79
|
||
| for (int attempt = 0; attempt < MAX_TRANSIENT_RETRIES; attempt++) { | ||
| if (attempt > 0) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't really get why don't get rid of this condition, move the sleep to end of loop instead of beginning. |
||
| try { | ||
| if (transientRetryDelayMs > 0) Thread.sleep(transientRetryDelayMs); | ||
| } catch (InterruptedException ie) { | ||
| Thread.currentThread().interrupt(); | ||
| throw new TransientConfigException("Config load for " + key.name() + " interrupted during retry", ie); | ||
| } | ||
| log.log(Level.WARNING, "Retrying config property {0}, attempt {1}/{2}", | ||
| new Object[]{key.name(), attempt + 1, MAX_TRANSIENT_RETRIES}); | ||
| } | ||
| try { | ||
| return delegate.getConfigPropertyAsOptional(key).orElse(NEGATIVE_VALUE); | ||
| } catch (TransientConfigException e) { | ||
| lastException = e; | ||
| log.log(Level.WARNING, "Transient failure on attempt {0}/{1} for config property {2}", | ||
| new Object[]{attempt + 1, MAX_TRANSIENT_RETRIES, key.name()}); | ||
| } | ||
| } | ||
| throw Objects.requireNonNull(lastException); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pointelss, right? |
||
| } | ||
|
|
||
| @Override | ||
| public ListenableFuture<String> reload(@NonNull ConfigProperty key, @NonNull String oldValue) { | ||
| try { | ||
|
aperez-worklytics marked this conversation as resolved.
|
||
| String newValue = delegate.getConfigPropertyAsOptional(key).orElse(NEGATIVE_VALUE); | ||
| // Fallback heuristic for backends that still swallow exceptions | ||
| // (e.g. GCP SecretManagerConfigService): if the value was valid | ||
| // before but now comes back empty, assume transient and retain. | ||
| if (NEGATIVE_VALUE.equals(newValue) && !NEGATIVE_VALUE.equals(oldValue)) { | ||
| log.log(Level.WARNING, | ||
| "Backend returned empty for config property {0} which was previously set; assuming transient failure and retaining cached value", | ||
| key.name()); | ||
| return Futures.immediateFuture(oldValue); | ||
| } | ||
|
Comment on lines
+103
to
+111
|
||
| return Futures.immediateFuture(newValue); | ||
| } catch (TransientConfigException e) { | ||
| // Backend explicitly signalled a transient failure. | ||
| // Returning the old value resets the write-time so Guava waits a | ||
| // full TTL before retrying, rather than retrying on every request. | ||
| log.log(Level.WARNING, | ||
| "Transient failure reloading config property {0}; retaining cached value until next refresh cycle", | ||
| key.name()); | ||
| return Futures.immediateFuture(oldValue); | ||
| } | ||
| } | ||
| }); | ||
| } | ||
|
|
@@ -85,8 +161,22 @@ public Optional<String> getConfigPropertyAsOptional(ConfigProperty property) { | |
| } else { | ||
| return Optional.of(value); | ||
| } | ||
| } catch (UncheckedExecutionException e) { | ||
| // Guava wraps RuntimeExceptions from load() in UncheckedExecutionException. | ||
| // TransientConfigException is a RuntimeException, so it lands here. | ||
| Throwable cause = e.getCause(); | ||
| if (cause instanceof TransientConfigException) { | ||
| // load() retried MAX_TRANSIENT_RETRIES times and still failed. Nothing was | ||
| // cached, so the next request will retry immediately. Re-throw so callers can | ||
| // distinguish a transient store outage from a genuinely missing property. | ||
| log.log(Level.WARNING, | ||
| "Transient backend failure for config property {0}; all retries exhausted", | ||
| property.name()); | ||
| throw (TransientConfigException) cause; | ||
| } | ||
| throw (cause instanceof RuntimeException) ? (RuntimeException) cause : e; | ||
| } catch (ExecutionException e) { | ||
| //unwrap if possible, re-throw | ||
| // Guava wraps checked exceptions from load() in ExecutionException. | ||
| if (e.getCause() == null) { | ||
| throw e; | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| package co.worklytics.psoxy.aws; | ||
|
|
||
| import software.amazon.awssdk.awscore.exception.AwsServiceException; | ||
|
|
||
| class AwsExceptionUtils { | ||
|
|
||
| static boolean isAccessDenied(AwsServiceException e) { | ||
| if (e.awsErrorDetails() == null) { | ||
| return false; | ||
| } | ||
| String code = e.awsErrorDetails().errorCode(); | ||
| return code != null && (code.contains("AccessDenied") || code.contains("Forbidden")); | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.