RetryHelper: Include operation name and cause for all metrics
This makes the metrics more consistent.
Signed-off-by: Edwin Kempin <ekempin@google.com>
Change-Id: I9aa9f59b9b7908775d84e60a1b3d87bf5ec28e80
diff --git a/java/com/google/gerrit/server/update/RetryHelper.java b/java/com/google/gerrit/server/update/RetryHelper.java
index 3dfa088..4f9a67c 100644
--- a/java/com/google/gerrit/server/update/RetryHelper.java
+++ b/java/com/google/gerrit/server/update/RetryHelper.java
@@ -34,8 +34,6 @@
import com.google.gerrit.common.Nullable;
import com.google.gerrit.exceptions.StorageException;
import com.google.gerrit.extensions.restapi.RestApiException;
-import com.google.gerrit.metrics.Counter1;
-import com.google.gerrit.metrics.Counter2;
import com.google.gerrit.metrics.Counter3;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.Field;
@@ -128,8 +126,8 @@
@VisibleForTesting
@Singleton
public static class Metrics {
- final Counter2<ActionType, String> attemptCounts;
- final Counter1<ActionType> timeoutCount;
+ final Counter3<ActionType, String, String> attemptCounts;
+ final Counter3<ActionType, String, String> timeoutCount;
final Counter3<ActionType, String, String> autoRetryCount;
final Counter3<ActionType, String, String> failuresOnAutoRetryCount;
@@ -137,6 +135,19 @@
Metrics(MetricMaker metricMaker) {
Field<ActionType> actionTypeField =
Field.ofEnum(ActionType.class, "action_type", Metadata.Builder::actionType).build();
+ Field<String> operationNameField =
+ Field.ofString("operation_name", Metadata.Builder::operationName)
+ .description("The name of the operation that was retried.")
+ .build();
+ Field<String> lastAttemptCauseField =
+ Field.ofString("cause", Metadata.Builder::cause)
+ .description("The cause for the last attempt.")
+ .build();
+ Field<String> causeField =
+ Field.ofString("cause", Metadata.Builder::cause)
+ .description("The cause for the retry.")
+ .build();
+
attemptCounts =
metricMaker.newCounter(
"action/retry_attempt_count",
@@ -146,9 +157,8 @@
.setCumulative()
.setUnit("attempts"),
actionTypeField,
- Field.ofString("cause", Metadata.Builder::cause)
- .description("The cause for the last attempt.")
- .build());
+ operationNameField,
+ lastAttemptCauseField);
timeoutCount =
metricMaker.newCounter(
"action/retry_timeout_count",
@@ -156,7 +166,9 @@
"Number of action executions of RetryHelper that ultimately timed out")
.setCumulative()
.setUnit("timeouts"),
- actionTypeField);
+ actionTypeField,
+ operationNameField,
+ lastAttemptCauseField);
autoRetryCount =
metricMaker.newCounter(
"action/auto_retry_count",
@@ -164,12 +176,8 @@
.setCumulative()
.setUnit("retries"),
actionTypeField,
- Field.ofString("operation_name", Metadata.Builder::operationName)
- .description("The name of the operation that was retried.")
- .build(),
- Field.ofString("cause", Metadata.Builder::cause)
- .description("The cause for the retry.")
- .build());
+ operationNameField,
+ causeField);
failuresOnAutoRetryCount =
metricMaker.newCounter(
"action/failures_on_auto_retry_count",
@@ -177,12 +185,8 @@
.setCumulative()
.setUnit("failures"),
actionTypeField,
- Field.ofString("operation_name", Metadata.Builder::operationName)
- .description("The name of the operation that was retried.")
- .build(),
- Field.ofString("cause", Metadata.Builder::cause)
- .description("The cause for the retry.")
- .build());
+ operationNameField,
+ causeField);
}
}
@@ -355,12 +359,13 @@
return false;
});
retryerBuilder.withRetryListener(listener);
- return executeWithTimeoutCount(actionType, action, retryerBuilder.build());
+ return executeWithTimeoutCount(actionType, action, opts, retryerBuilder.build());
} finally {
if (listener.getAttemptCount() > 1) {
logger.atFine().log("%s was attempted %d times", actionType, listener.getAttemptCount());
metrics.attemptCounts.incrementBy(
actionType,
+ opts.caller().orElse("N/A"),
listener.getCause().map(this::formatCause).orElse("_unknown"),
listener.getAttemptCount() - 1);
}
@@ -396,18 +401,22 @@
*
* @param actionType the type of the action
* @param action the action which should be executed and retried on failure
+ * @param opts options for retrying the action on failure
* @param retryer the retryer
* @return the result of executing the action
* @throws Throwable any error or exception that made the action fail, callers are expected to
* catch and inspect this Throwable to decide carefully whether it should be re-thrown
*/
- private <T> T executeWithTimeoutCount(ActionType actionType, Action<T> action, Retryer<T> retryer)
- throws Throwable {
+ private <T> T executeWithTimeoutCount(
+ ActionType actionType, Action<T> action, Options opts, Retryer<T> retryer) throws Throwable {
try {
return retryer.call(action::call);
} catch (ExecutionException | RetryException e) {
if (e instanceof RetryException) {
- metrics.timeoutCount.increment(actionType);
+ metrics.timeoutCount.increment(
+ actionType,
+ opts.caller().orElse("N/A"),
+ e.getCause() != null ? formatCause(e.getCause()) : "_unknown");
}
if (e.getCause() != null) {
throw e.getCause();