Fix issue with URI lock release after replication task cancellation
PushOne.isRetrying will always returns true even if the task is already
cancelled. That creates a situation where canceled tasks are never
releasing URI locks and blocks other tasks to be replicated to
that URI.
Bug: Issue 12940
Change-Id: I7f181a368305b3153fb6ecfe6e1387eb83fdacb5
diff --git a/BUILD b/BUILD
index 50615d8..14b8876 100644
--- a/BUILD
+++ b/BUILD
@@ -21,6 +21,7 @@
junit_tests(
name = "replication_tests",
+ timeout = "long",
srcs = glob([
"src/test/java/**/*Test.java",
"src/test/java/**/*IT.java",
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
index 6907ad0..936bb2c 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
@@ -561,6 +561,7 @@
pool.schedule(pushOp, config.getRetryDelay(), TimeUnit.MINUTES);
} else {
pushOp.canceledByReplication();
+ pushOp.retryDone();
pending.remove(uri);
stateLog.error(
"Push to " + pushOp.getURI() + " cancelled after maximum number of retries",
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
index eff06ce..9ccb70c 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
@@ -219,7 +219,7 @@
return maxRetries == 0 || retryCount <= maxRetries;
}
- private void retryDone() {
+ void retryDone() {
this.retrying = false;
}
diff --git a/src/test/java/com/googlesource/gerrit/plugins/replication/ReplicationIT.java b/src/test/java/com/googlesource/gerrit/plugins/replication/ReplicationIT.java
index 61d0b9b..6ad5558 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/replication/ReplicationIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/replication/ReplicationIT.java
@@ -35,6 +35,8 @@
import com.google.gerrit.server.config.SitePaths;
import com.google.inject.Inject;
import com.google.inject.Key;
+import com.googlesource.gerrit.plugins.replication.Destination.QueueInfo;
+import com.googlesource.gerrit.plugins.replication.ReplicationConfig.FilterType;
import com.googlesource.gerrit.plugins.replication.ReplicationTasksStorage.ReplicateRefUpdate;
import java.io.IOException;
import java.nio.file.DirectoryStream;
@@ -51,6 +53,7 @@
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.storage.file.FileBasedConfig;
+import org.eclipse.jgit.transport.URIish;
import org.eclipse.jgit.util.FS;
import org.junit.Test;
@@ -63,12 +66,19 @@
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
private static final int TEST_REPLICATION_DELAY = 1;
private static final int TEST_REPLICATION_RETRY = 1;
+ private static final int TEST_REPLICATION_MAX_RETRIES = 1;
private static final Duration TEST_TIMEOUT =
Duration.ofSeconds((TEST_REPLICATION_DELAY + TEST_REPLICATION_RETRY * 60) + 1);
+ private static final Duration MAX_RETRY_WITH_TOLERANCE_TIMEOUT =
+ Duration.ofSeconds(
+ (TEST_REPLICATION_DELAY + TEST_REPLICATION_RETRY * 60) * TEST_REPLICATION_MAX_RETRIES
+ + 10);
+
@Inject private SitePaths sitePaths;
@Inject private ProjectOperations projectOperations;
@Inject private DynamicSet<ProjectDeletedListener> deletedListeners;
+ private DestinationsCollection destinationCollection;
private Path pluginDataDir;
private Path gitPath;
private Path storagePath;
@@ -94,6 +104,7 @@
replicationConfig = plugin.getSysInjector().getInstance(ReplicationConfig.class);
storagePath = pluginDataDir.resolve("ref-updates");
tasksStorage = plugin.getSysInjector().getInstance(ReplicationTasksStorage.class);
+ destinationCollection = plugin.getSysInjector().getInstance(DestinationsCollection.class);
cleanupReplicationTasks();
tasksStorage.disableDeleteForTesting(true);
}
@@ -416,6 +427,45 @@
waitUntil(() -> isTaskCleanedUp());
}
+ @Test
+ public void shouldCleanupBothTasksAndLocksAfterReplicationCancelledAfterMaxRetries()
+ throws Exception {
+ String projectName = "task_cleanup_locks_project_cancelled";
+ String remoteDestination = "http://invalidurl:9090/";
+ URIish urish = new URIish(remoteDestination + projectName + ".git");
+ tasksStorage.disableDeleteForTesting(false);
+
+ setReplicationDestination(projectName, "replica", Optional.of(projectName));
+ // replace correct urls with invalid one to trigger retry
+ config.setString("remote", projectName, "url", remoteDestination + "${name}.git");
+ config.setInt("remote", projectName, "replicationMaxRetries", TEST_REPLICATION_MAX_RETRIES);
+ config.save();
+ reloadConfig();
+ Destination destination =
+ destinationCollection.getAll(FilterType.ALL).stream()
+ .filter(dest -> dest.getProjects().contains(projectName))
+ .findFirst()
+ .get();
+
+ waitUntil(() -> tasksStorage.listRunning().size() == 0);
+
+ createTestProject(projectName);
+
+ waitUntil(() -> isTaskRescheduled(destination.getQueueInfo(), urish));
+ // replicationRetry is set to 1 minute which is the minimum value. That's why
+ // should be safe to get the pushOne object from pending because it should be
+ // here for one minute
+ PushOne pushOp = destination.getQueueInfo().pending.get(urish);
+
+ WaitUtil.waitUntil(() -> pushOp.wasCanceled(), MAX_RETRY_WITH_TOLERANCE_TIMEOUT);
+ waitUntil(() -> isTaskCleanedUp());
+ }
+
+ private boolean isTaskRescheduled(QueueInfo queue, URIish uri) {
+ PushOne pushOne = queue.pending.get(uri);
+ return pushOne == null ? false : pushOne.isRetrying();
+ }
+
private Ref getRef(Repository repo, String branchName) throws IOException {
return repo.getRefDatabase().exactRef(branchName);
}