blob: 4cbaffdc76871b9682463ce8293b62cefd005ac7 [file] [log] [blame]
// Copyright (C) 2017 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.update;
import static com.google.common.base.MoreObjects.firstNonNull;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import com.github.rholder.retry.Attempt;
import com.github.rholder.retry.RetryException;
import com.github.rholder.retry.RetryListener;
import com.github.rholder.retry.RetryerBuilder;
import com.github.rholder.retry.StopStrategies;
import com.github.rholder.retry.WaitStrategies;
import com.github.rholder.retry.WaitStrategy;
import com.google.auto.value.AutoValue;
import com.google.common.base.Throwables;
import com.google.gerrit.common.Nullable;
import com.google.gerrit.extensions.restapi.RestApiException;
import com.google.gerrit.metrics.Counter0;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.Histogram0;
import com.google.gerrit.metrics.MetricMaker;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.LockFailureException;
import com.google.gerrit.server.notedb.NotesMigration;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import java.time.Duration;
import java.util.concurrent.ExecutionException;
import org.eclipse.jgit.lib.Config;
@Singleton
public class RetryHelper {
public interface Action<T> {
T call(BatchUpdate.Factory updateFactory) throws Exception;
}
/**
* Options for retrying a single operation.
*
* <p>This class is similar in function to upstream's {@link RetryerBuilder}, but it exists as its
* own class in Gerrit for several reasons:
*
* <ul>
* <li>Gerrit needs to support defaults for some of the options, such as a default timeout.
* {@code RetryerBuilder} doesn't support calling the same setter multiple times, so doing
* this with {@code RetryerBuilder} directly would not be easy.
* <li>Gerrit explicitly does not want callers to have full control over all possible options,
* so this class exposes a curated subset.
* </ul>
*/
@AutoValue
public abstract static class Options {
@Nullable
abstract RetryListener listener();
@Nullable
abstract Duration timeout();
@AutoValue.Builder
public abstract static class Builder {
public abstract Builder listener(RetryListener listener);
public abstract Builder timeout(Duration timeout);
public abstract Options build();
}
}
@Singleton
private static class Metrics {
final Histogram0 attemptCounts;
final Counter0 timeoutCount;
@Inject
Metrics(MetricMaker metricMaker) {
attemptCounts =
metricMaker.newHistogram(
"batch_update/retry_attempt_counts",
new Description(
"Distribution of number of attempts made by RetryHelper"
+ " (1 == single attempt, no retry)")
.setCumulative()
.setUnit("attempts"));
timeoutCount =
metricMaker.newCounter(
"batch_update/retry_timeout_count",
new Description("Number of executions of RetryHelper that ultimately timed out")
.setCumulative()
.setUnit("timeouts"));
}
}
public static Options.Builder options() {
return new AutoValue_RetryHelper_Options.Builder();
}
public static Options defaults() {
return options().build();
}
private final NotesMigration migration;
private final Metrics metrics;
private final BatchUpdate.Factory updateFactory;
private final Duration defaultTimeout;
private final WaitStrategy waitStrategy;
@Inject
RetryHelper(
@GerritServerConfig Config cfg,
Metrics metrics,
NotesMigration migration,
ReviewDbBatchUpdate.AssistedFactory reviewDbBatchUpdateFactory,
NoteDbBatchUpdate.AssistedFactory noteDbBatchUpdateFactory) {
this.metrics = metrics;
this.migration = migration;
this.updateFactory =
new BatchUpdate.Factory(migration, reviewDbBatchUpdateFactory, noteDbBatchUpdateFactory);
this.defaultTimeout =
Duration.ofMillis(
cfg.getTimeUnit("noteDb", null, "retryTimeout", SECONDS.toMillis(20), MILLISECONDS));
this.waitStrategy =
WaitStrategies.join(
WaitStrategies.exponentialWait(
cfg.getTimeUnit("noteDb", null, "retryMaxWait", SECONDS.toMillis(5), MILLISECONDS),
MILLISECONDS),
WaitStrategies.randomWait(50, MILLISECONDS));
}
public Duration getDefaultTimeout() {
return defaultTimeout;
}
public <T> T execute(Action<T> action) throws RestApiException, UpdateException {
return execute(action, defaults());
}
public <T> T execute(Action<T> action, Options opts) throws RestApiException, UpdateException {
MetricListener listener = null;
try {
RetryerBuilder<T> builder = RetryerBuilder.newBuilder();
if (migration.disableChangeReviewDb()) {
listener = new MetricListener(opts.listener());
builder
.withRetryListener(listener)
.withStopStrategy(
StopStrategies.stopAfterDelay(
firstNonNull(opts.timeout(), defaultTimeout).toMillis(), MILLISECONDS))
.withWaitStrategy(waitStrategy)
.retryIfException(RetryHelper::isLockFailure);
} else {
// Either we aren't full-NoteDb, or the underlying ref storage doesn't support atomic
// transactions. Either way, retrying a partially-failed operation is not idempotent, so
// don't do it automatically. Let the end user decide whether they want to retry.
}
return builder.build().call(() -> action.call(updateFactory));
} catch (ExecutionException | RetryException e) {
if (e instanceof RetryException) {
metrics.timeoutCount.increment();
}
if (e.getCause() != null) {
Throwables.throwIfInstanceOf(e.getCause(), UpdateException.class);
Throwables.throwIfInstanceOf(e.getCause(), RestApiException.class);
}
throw new UpdateException(e);
} finally {
if (listener != null) {
metrics.attemptCounts.record(listener.getAttemptCount());
}
}
}
private static boolean isLockFailure(Throwable t) {
if (t instanceof UpdateException) {
t = t.getCause();
}
return t instanceof LockFailureException;
}
private static class MetricListener implements RetryListener {
private final RetryListener delegate;
private long attemptCount;
MetricListener(@Nullable RetryListener delegate) {
this.delegate = delegate;
attemptCount = 1;
}
@Override
public <V> void onRetry(Attempt<V> attempt) {
attemptCount = attempt.getAttemptNumber();
if (delegate != null) {
delegate.onRetry(attempt);
}
}
long getAttemptCount() {
return attemptCount;
}
}
}