Retry rename replication upon failure on replica

This change adds a configuration entry to setup the number of retries
default is three retries.

Change-Id: I47c011dc66f5ccbb614422a1c52fc3719e8dfccb
diff --git a/src/main/java/com/googlesource/gerrit/plugins/renameproject/Configuration.java b/src/main/java/com/googlesource/gerrit/plugins/renameproject/Configuration.java
index a74c4cb..c25152e 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/renameproject/Configuration.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/renameproject/Configuration.java
@@ -34,6 +34,7 @@
   private final int sshCommandTimeout;
   private final int sshConnectionTimeout;
   private final String renameRegex;
+  private final int renameReplicationRetries;
 
   private final Set<String> urls;
 
@@ -44,6 +45,7 @@
     sshCommandTimeout = cfg.getInt("sshCommandTimeout", 0);
     sshConnectionTimeout = cfg.getInt("sshConnectionTimeout", DEFAULT_SSH_CONNECTION_TIMEOUT_MS);
     renameRegex = cfg.getString("renameRegex", ".+");
+    renameReplicationRetries = cfg.getInt("renameReplicationRetries", 3);
 
     urls =
         Arrays.stream(cfg.getStringList(URL_KEY))
@@ -72,4 +74,8 @@
   public String getRenameRegex() {
     return renameRegex;
   }
+
+  public int getRenameReplicationRetries() {
+    return renameReplicationRetries;
+  }
 }
diff --git a/src/main/java/com/googlesource/gerrit/plugins/renameproject/RenameProject.java b/src/main/java/com/googlesource/gerrit/plugins/renameproject/RenameProject.java
index 0cf1ca4..d56d3f3 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/renameproject/RenameProject.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/renameproject/RenameProject.java
@@ -53,8 +53,10 @@
 import java.io.OutputStream;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Optional;
+import java.util.Set;
 import org.eclipse.jgit.errors.ConfigInvalidException;
 import org.eclipse.jgit.transport.URIish;
 import org.slf4j.Logger;
@@ -239,7 +241,7 @@
       pluginEvent.fire(pluginName, pluginName, oldProjectKey.get() + ":" + newProjectKey.get());
 
       // replicate rename-project operation to other replica instances
-      replicateRename(sshHelper, input, oldProjectKey);
+      replicateRename(sshHelper, input, oldProjectKey, pm);
     } catch (Exception e) {
       if (stepsPerformed.isEmpty()) {
         log.error("Renaming procedure failed. Exception caught: {}", e.toString());
@@ -338,8 +340,37 @@
     return dbHandler.getChangeIds(oldProjectKey);
   }
 
-  void replicateRename(SshHelper sshHelper, Input input, Project.NameKey oldProjectKey) {
-    for (String url : cfg.getUrls()) {
+  void replicateRename(
+      SshHelper sshHelper,
+      Input input,
+      Project.NameKey oldProjectKey,
+      Optional<ProgressMonitor> opm) {
+    opm.ifPresent(
+        pm ->
+            pm.beginTask(
+                String.format(
+                    "Replicating the rename of %s to %s", oldProjectKey.get(), input.name)));
+
+    Set<String> urls = cfg.getUrls();
+    int nbRetries = cfg.getRenameReplicationRetries();
+
+    for (int i = 0; i < nbRetries && urls.size() > 0; ++i) {
+      urls = tryRenameReplication(urls, sshHelper, input, oldProjectKey);
+    }
+    for (String url : urls) {
+      log.error(
+          "Failed to replicate the renaming of {} to {} on {} during {} attempts",
+          oldProjectKey.get(),
+          input.name,
+          url,
+          cfg.getUrls());
+    }
+  }
+
+  private Set<String> tryRenameReplication(
+      Set<String> replicas, SshHelper sshHelper, Input input, Project.NameKey oldProjectKey) {
+    Set<String> failedReplicas = new HashSet<>();
+    for (String url : replicas) {
       try {
         OutputStream errStream = sshHelper.newErrorBufferStream();
         sshHelper.executeRemoteSsh(
@@ -351,8 +382,13 @@
           throw new RenameReplicationException(errorMessage);
         }
       } catch (IOException | URISyntaxException | RenameReplicationException e) {
-        log.error("Failed to replicate rename to {}: {}", url, e.getMessage());
+        log.info(
+            "Rescheduling a rename replication for retry for {} on project {}",
+            url,
+            oldProjectKey.get());
+        failedReplicas.add(url);
       }
     }
+    return failedReplicas;
   }
 }
diff --git a/src/main/java/com/googlesource/gerrit/plugins/renameproject/SshHelper.java b/src/main/java/com/googlesource/gerrit/plugins/renameproject/SshHelper.java
index ae3ec9f..be7e5eb 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/renameproject/SshHelper.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/renameproject/SshHelper.java
@@ -14,6 +14,7 @@
 
 package com.googlesource.gerrit.plugins.renameproject;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.inject.Inject;
 import com.google.inject.Provider;
 import java.io.IOException;
@@ -87,7 +88,8 @@
     };
   }
 
-  protected RemoteSession connect(URIish uri) throws TransportException {
+  @VisibleForTesting
+  public RemoteSession connect(URIish uri) throws TransportException {
     return sshSessionFactoryProvider.get().getSession(uri, null, FS.DETECTED, connectionTimeout);
   }
 }
diff --git a/src/main/resources/Documentation/about.md b/src/main/resources/Documentation/about.md
index e0bd0b6..80ef373 100644
--- a/src/main/resources/Documentation/about.md
+++ b/src/main/resources/Documentation/about.md
@@ -42,9 +42,11 @@
 command to replicas' rename-project plugin using the hostname provided in the configuration file.
 Replicas then perform their own local file system rename.
 
-The caveat is if ssh rename replication fails, the plugin doesn't retry the rename replication
-operation. This results in primary and replica instances being out of sync. The admin then will have
-to consider the following steps:
+The ssh rename replication will retry as many times as provided by the configuration parameter
+`renameReplicationRetries`. The default value is 3 and can be configured in `gerrit.config`.
+If ssh rename replication fails after the mentioned number of retries, the plugin stops retrying
+the rename replication operation and logs the error. This results in primary and replica instances
+being out of sync. The admin then will have to consider the following steps:
 
 1. Start replication on the renamed project from the primary side. For more information, see the
 [replication start command](../../replication/Documentation/cmd-start.md).
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index 80872a1..e8ee6ae 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -44,6 +44,12 @@
 * `sshConnectionTimeout` : Timeout for SSH connections in minutes. If 0, there is no timeout, and
 the client waits indefinitely. By default, 2 minutes.
 
+Provides a configuration to customize the number of rename replication retries. By default, 3.
+
+```
+  renameReplicationRetries = 6
+```
+
 Also, this plugin offers a way to restrict the new names of the projects to match an optionally
 configured regex. For example:
 
diff --git a/src/test/java/com/googlesource/gerrit/plugins/renameproject/RenameIT.java b/src/test/java/com/googlesource/gerrit/plugins/renameproject/RenameIT.java
index c436745..f022707 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/renameproject/RenameIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/renameproject/RenameIT.java
@@ -16,8 +16,10 @@
 
 import static com.google.common.truth.Truth.assertThat;
 import static org.mockito.ArgumentMatchers.eq;
-import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.atMostOnce;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
@@ -42,6 +44,7 @@
 import java.util.Optional;
 import javax.inject.Named;
 import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.transport.RemoteSession;
 import org.eclipse.jgit.transport.URIish;
 import org.junit.Test;
 
@@ -212,7 +215,45 @@
   @Test
   @UseLocalDisk
   @GerritConfig(name = "plugin.rename-project.url", value = URL)
-  public void testReplicateRename() throws Exception {
+  public void testReplicateRenameSucceedsThenEnds() throws Exception {
+    RenameProject renameProject = plugin.getSysInjector().getInstance(RenameProject.class);
+    SshHelper sshHelper = mock(SshHelper.class);
+    OutputStream errStream = mock(OutputStream.class);
+    Input input = new Input();
+    input.name = NEW_PROJECT_NAME;
+    String expectedCommand =
+        PLUGIN_NAME + " " + project.get() + " " + NEW_PROJECT_NAME + " " + REPLICATION_OPTION;
+
+    when(sshHelper.newErrorBufferStream()).thenReturn(errStream);
+    when(errStream.toString()).thenReturn("");
+
+    renameProject.replicateRename(sshHelper, input, project, Optional.empty());
+    verify(sshHelper, atMostOnce())
+        .executeRemoteSsh(eq(new URIish(URL)), eq(expectedCommand), eq(errStream));
+  }
+
+  @Test
+  @UseLocalDisk
+  @GerritConfig(name = "plugin.rename-project.url", value = URL)
+  public void testReplicateRenameFailsOnReplicaThenRetries() throws Exception {
+    RenameProject renameProject = plugin.getSysInjector().getInstance(RenameProject.class);
+    RemoteSession session = mock(RemoteSession.class);
+    SshHelper sshHelper = mock(SshHelper.class);
+    OutputStream errStream = mock(OutputStream.class);
+    when(sshHelper.newErrorBufferStream()).thenReturn(errStream);
+    URIish urish = new URIish(URL);
+    Input input = new Input();
+    input.name = NEW_PROJECT_NAME;
+    when(sshHelper.connect(eq(urish))).thenReturn(session);
+    renameProject.replicateRename(sshHelper, input, project, Optional.empty());
+    String expectedCommand =
+        PLUGIN_NAME + " " + project.get() + " " + NEW_PROJECT_NAME + " " + REPLICATION_OPTION;
+    verify(sshHelper, times(3)).executeRemoteSsh(eq(urish), eq(expectedCommand), eq(errStream));
+  }
+
+  @Test
+  @UseLocalDisk
+  public void testReplicateRenameNeverCalled() throws Exception {
     RenameProject renameProject = plugin.getSysInjector().getInstance(RenameProject.class);
     SshHelper sshHelper = mock(SshHelper.class);
     OutputStream errStream = mock(OutputStream.class);
@@ -225,8 +266,8 @@
     when(sshHelper.newErrorBufferStream()).thenReturn(errStream);
     when(errStream.toString()).thenReturn("");
 
-    renameProject.replicateRename(sshHelper, input, project);
-    verify(sshHelper, atLeastOnce())
+    renameProject.replicateRename(sshHelper, input, project, Optional.empty());
+    verify(sshHelper, never())
         .executeRemoteSsh(eq(new URIish(URL)), eq(expectedCommand), eq(errStream));
   }