Don't read git submodule commits during replication

Currently, any attempt to read/parse a git submodule commit will
throw a `org.eclipse.jgit.errors.MissingObjectException`, as the git
submodule's commit hash cannot be found in the repo.

The git submodule's commit can be identified as it has the special
`160000` file mode.

The solution is to exclude the git submodule's commit from being
read/parsed.

I believe everything is replicated correctly in the end, but it will
be nice to get rid of this exception.

Bug: Issue 16897
Change-Id: Ib21b9e19c5282f94226d7a27c25c5fe0f94b7279
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReader.java b/src/main/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReader.java
index cd6a0ea..3f03ed6 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReader.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReader.java
@@ -38,6 +38,7 @@
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.errors.RepositoryNotFoundException;
 import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.Ref;
@@ -214,6 +215,24 @@
     return blobs;
   }
 
+  /**
+   * Reads and evaluates the git objects in this revision. The following are filtered out:
+   * <li>DELETE changes
+   * <li>git submodule commits, because the git commit hash is not present in this repo.
+   *
+   *     <p>The method keeps track of the total size of all objects it has processed, and verifies
+   *     it is below the acceptable threshold.
+   *
+   * @param projectName - the name of the project, used to check total object size threshold
+   * @param refName - the ref name, used to check total object size threshold
+   * @param git - this git repo, used to load the objects
+   * @param totalRefSize - tracks the total size of objects processed
+   * @param diffEntries - a list of the diff entries for this revision
+   * @return a List of `RevisionObjectData`, an object that includes the git object SHA, the git
+   *     object change type and the object contents.
+   * @throws MissingObjectException - if the object can't be found
+   * @throws IOException - if processing failed for another reason
+   */
   private List<RevisionObjectData> readBlobs(
       Project.NameKey projectName,
       String refName,
@@ -223,7 +242,7 @@
       throws MissingObjectException, IOException {
     List<RevisionObjectData> blobs = Lists.newLinkedList();
     for (DiffEntry diffEntry : diffEntries) {
-      if (!ChangeType.DELETE.equals(diffEntry.getChangeType())) {
+      if (!(ChangeType.DELETE.equals(diffEntry.getChangeType()) || gitSubmoduleCommit(diffEntry))) {
         ObjectId diffObjectId = diffEntry.getNewId().toObjectId();
         ObjectLoader objectLoader = git.open(diffObjectId);
         totalRefSize += objectLoader.getSize();
@@ -237,6 +256,10 @@
     return blobs;
   }
 
+  private boolean gitSubmoduleCommit(DiffEntry diffEntry) {
+    return diffEntry.getNewMode().equals(FileMode.GITLINK);
+  }
+
   private RevTree getParentTree(Repository git, RevCommit commit)
       throws MissingObjectException, IOException {
     RevCommit parent = commit.getParent(0);
diff --git a/src/test/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReaderIT.java b/src/test/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReaderIT.java
index 1d8520f..fcd9b19 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReaderIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/replication/pull/RevisionReaderIT.java
@@ -20,6 +20,7 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.truth.Truth8;
 import com.google.gerrit.acceptance.LightweightPluginDaemonTest;
+import com.google.gerrit.acceptance.PushOneCommit;
 import com.google.gerrit.acceptance.PushOneCommit.Result;
 import com.google.gerrit.acceptance.TestPlugin;
 import com.google.gerrit.acceptance.UseLocalDisk;
@@ -199,6 +200,39 @@
     Truth8.assertThat(revisionDataOption).isEmpty();
   }
 
+  @Test
+  public void shouldFilterOutGitSubmoduleCommitsWhenReadingTheBlobs() throws Exception {
+    String submodulePath = "submodule_path";
+    final ObjectId GitSubmoduleCommit =
+        ObjectId.fromString("93e2901bc0b4719ef6081ee6353b49c9cdd97614");
+
+    PushOneCommit push =
+        pushFactory
+            .create(admin.newIdent(), testRepo)
+            .addGitSubmodule(submodulePath, GitSubmoduleCommit);
+    PushOneCommit.Result pushResult = push.to("refs/for/master");
+    pushResult.assertOkStatus();
+    Change.Id changeId = pushResult.getChange().getId();
+    String refName = RefNames.patchSetRef(pushResult.getPatchSetId());
+
+    CommentInput comment = createCommentInput(1, 0, 1, 1, "Test comment");
+
+    ReviewInput reviewInput = new ReviewInput();
+    reviewInput.comments = ImmutableMap.of(Patch.COMMIT_MSG, ImmutableList.of(comment));
+    gApi.changes().id(changeId.get()).current().review(reviewInput);
+
+    Optional<RevisionData> revisionDataOption =
+        refObjectId(refName).flatMap(objId -> readRevisionFromObjectUnderTest(refName, objId, 0));
+
+    assertThat(revisionDataOption.isPresent()).isTrue();
+    RevisionData revisionData = revisionDataOption.get();
+
+    assertThat(revisionData.getBlobs()).hasSize(1);
+    RevisionObjectData blobObject = revisionData.getBlobs().get(0);
+    assertThat(blobObject.getType()).isEqualTo(Constants.OBJ_BLOB);
+    assertThat(blobObject.getSha1()).isNotEqualTo(GitSubmoduleCommit.getName());
+  }
+
   private CommentInput createCommentInput(
       int startLine, int startCharacter, int endLine, int endCharacter, String message) {
     CommentInput comment = new CommentInput();