Support related changes lists exceeding the user's query limit

Add a method to InternalQuery to exhaustively fetch a list of results
for multiple queries, and use this from InternalChangeQuery. For now, we
still respect the user's query limit because this always happens when we
set enforceVisibility=true. This may be revisited, but even so, the
queryExhaustively method is useful since the number of related changes
may also theoretically exceed the index's maxLimit.

As context, prior to Ibe271850, changes submitted with a rebase submit
strategy would be added to the relation chain of whatever change
happened to be at the tip of the branch, even if they weren't originally
related. This means even though users weren't creating chains of 500+
changes, they actually do exist in the wild due to this bug. Without
this change, GetRelated for those changes may fail with a 500 if the
input change didn't happen to show up in the first page of results in
the byProjectGroups query.

Change-Id: Ib9dad2454eed8110ce6db5e1bc840c59ebf82211
diff --git a/java/com/google/gerrit/index/query/InternalQuery.java b/java/com/google/gerrit/index/query/InternalQuery.java
index 0b5088d..ca5cc9b 100644
--- a/java/com/google/gerrit/index/query/InternalQuery.java
+++ b/java/com/google/gerrit/index/query/InternalQuery.java
@@ -17,6 +17,7 @@
 import static com.google.common.base.Preconditions.checkArgument;
 import static java.util.stream.Collectors.toSet;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.gerrit.index.FieldDef;
@@ -27,6 +28,7 @@
 import com.google.gwtorm.server.OrmException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.function.Supplier;
 
 /**
  * Execute a single query over a secondary index, for use by Gerrit internals.
@@ -58,6 +60,11 @@
     return (Q) this;
   }
 
+  final Q setStart(int start) {
+    queryProcessor.setStart(start);
+    return self();
+  }
+
   public final Q setLimit(int n) {
     queryProcessor.setUserProvidedLimit(n);
     return self();
@@ -82,8 +89,12 @@
   }
 
   public final List<T> query(Predicate<T> p) throws OrmException {
+    return queryResults(p).entities();
+  }
+
+  final QueryResult<T> queryResults(Predicate<T> p) throws OrmException {
     try {
-      return queryProcessor.query(p).entities();
+      return queryProcessor.query(p);
     } catch (QueryParseException e) {
       throw new OrmException(e);
     }
@@ -111,4 +122,48 @@
     Index<?, T> index = indexes != null ? indexes.getSearchIndex() : null;
     return index != null ? index.getSchema() : null;
   }
+
+  /**
+   * Query a predicate repeatedly until all results are exhausted.
+   *
+   * <p>Capable of iterating through all results regardless of limits. The passed {@code
+   * querySupplier} may choose to pre-set limits or not; this only affects the number of queries
+   * that may be issued, not the size of the final results.
+   *
+   * <p>Since multiple queries may be issued, this method is subject to races when the result set
+   * changes mid-iteration. This may result in skipped results, if an entity gets modified to jump
+   * to the front of the list after this method has passed it. It may also result in duplicate
+   * results, if an entity at the end of one batch of results gets pushed back further, putting it
+   * at the beginning of the next batch. This race cannot be avoided unless we change the underlying
+   * index interface to support true continuation tokens.
+   *
+   * @param querySupplier supplier for queries. Callers will generally pass a lambda that invokes an
+   *     underlying {@code Provider<InternalFooQuery>}, since the instances are not reusable. The
+   *     lambda may also call additional methods on the newly-created query, such as {@link
+   *     #enforceVisibility(boolean)}.
+   * @param predicate predicate to search for.
+   * @param <T> result type.
+   * @return exhaustive list of results, subject to the race condition described above.
+   * @throws OrmException if an error occurred.
+   */
+  protected static <T> ImmutableList<T> queryExhaustively(
+      Supplier<? extends InternalQuery<T, ?>> querySupplier, Predicate<T> predicate)
+      throws OrmException {
+    ImmutableList.Builder<T> b = null;
+    int start = 0;
+    while (true) {
+      QueryResult<T> qr = querySupplier.get().setStart(start).queryResults(predicate);
+      if (b == null) {
+        if (!qr.more()) {
+          return qr.entities();
+        }
+        b = ImmutableList.builder();
+      }
+      b.addAll(qr.entities());
+      if (!qr.more()) {
+        return b.build();
+      }
+      start += qr.entities().size();
+    }
+  }
 }
diff --git a/java/com/google/gerrit/server/query/change/InternalChangeQuery.java b/java/com/google/gerrit/server/query/change/InternalChangeQuery.java
index f53e663..973c451 100644
--- a/java/com/google/gerrit/server/query/change/InternalChangeQuery.java
+++ b/java/com/google/gerrit/server/query/change/InternalChangeQuery.java
@@ -44,6 +44,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.function.Supplier;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.Repository;
@@ -274,34 +275,42 @@
     return query(new SubmissionIdPredicate(cs));
   }
 
-  private List<ChangeData> byProjectGroups(Project.NameKey project, Collection<String> groups)
-      throws OrmException {
+  private static Predicate<ChangeData> byProjectGroupsPredicate(
+      IndexConfig indexConfig, Project.NameKey project, Collection<String> groups) {
     int n = indexConfig.maxTerms() - 1;
     checkArgument(groups.size() <= n, "cannot exceed %s groups", n);
     List<GroupPredicate> groupPredicates = new ArrayList<>(groups.size());
     for (String g : groups) {
       groupPredicates.add(new GroupPredicate(g));
     }
-    return query(and(project(project), or(groupPredicates)));
+    return and(project(project), or(groupPredicates));
   }
 
-  // Batching via multiple queries requires passing in a Provider since the underlying
-  // QueryProcessor instance is not reusable.
   public static List<ChangeData> byProjectGroups(
       Provider<InternalChangeQuery> queryProvider,
       IndexConfig indexConfig,
       Project.NameKey project,
       Collection<String> groups)
       throws OrmException {
+    // These queries may be complex along multiple dimensions:
+    //  * Many groups per change, if there are very many patch sets. This requires partitioning the
+    //    list of predicates and combining results.
+    //  * Many changes with the same set of groups, if the relation chain is very long. This
+    //    requires querying exhaustively with pagination.
+    // For both cases, we need to invoke the queryProvider multiple times, since each
+    // InternalChangeQuery is single-use.
+
+    Supplier<InternalChangeQuery> querySupplier = () -> queryProvider.get().enforceVisibility(true);
     int batchSize = indexConfig.maxTerms() - 1;
     if (groups.size() <= batchSize) {
-      return queryProvider.get().enforceVisibility(true).byProjectGroups(project, groups);
+      return queryExhaustively(
+          querySupplier, byProjectGroupsPredicate(indexConfig, project, groups));
     }
     Set<Change.Id> seen = new HashSet<>();
     List<ChangeData> result = new ArrayList<>();
     for (List<String> part : Iterables.partition(groups, batchSize)) {
       for (ChangeData cd :
-          queryProvider.get().enforceVisibility(true).byProjectGroups(project, part)) {
+          queryExhaustively(querySupplier, byProjectGroupsPredicate(indexConfig, project, part))) {
         if (!seen.add(cd.getId())) {
           result.add(cd);
         }
diff --git a/javatests/com/google/gerrit/acceptance/server/change/GetRelatedIT.java b/javatests/com/google/gerrit/acceptance/server/change/GetRelatedIT.java
index ac0a499..6c4c914 100644
--- a/javatests/com/google/gerrit/acceptance/server/change/GetRelatedIT.java
+++ b/javatests/com/google/gerrit/acceptance/server/change/GetRelatedIT.java
@@ -26,13 +26,20 @@
 import com.google.gerrit.acceptance.GerritConfig;
 import com.google.gerrit.acceptance.NoHttpd;
 import com.google.gerrit.acceptance.PushOneCommit;
+import com.google.gerrit.acceptance.testsuite.account.AccountOperations;
+import com.google.gerrit.acceptance.testsuite.group.GroupOperations;
 import com.google.gerrit.common.RawInputUtil;
+import com.google.gerrit.common.data.GlobalCapability;
+import com.google.gerrit.common.data.PermissionRule;
 import com.google.gerrit.extensions.api.changes.RelatedChangeAndCommitInfo;
 import com.google.gerrit.extensions.common.CommitInfo;
 import com.google.gerrit.extensions.common.EditInfo;
 import com.google.gerrit.index.IndexConfig;
+import com.google.gerrit.reviewdb.client.Account;
+import com.google.gerrit.reviewdb.client.AccountGroup;
 import com.google.gerrit.reviewdb.client.Change;
 import com.google.gerrit.reviewdb.client.PatchSet;
+import com.google.gerrit.server.project.testing.Util;
 import com.google.gerrit.server.query.change.ChangeData;
 import com.google.gerrit.server.restapi.change.ChangesCollection;
 import com.google.gerrit.server.restapi.change.GetRelated;
@@ -44,6 +51,9 @@
 import com.google.gerrit.testing.TestTimeUtil;
 import com.google.gwtorm.server.OrmException;
 import com.google.inject.Inject;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
 import org.eclipse.jgit.junit.TestRepository;
@@ -65,6 +75,9 @@
     return cfg;
   }
 
+  @Inject private AccountOperations accountOperations;
+  @Inject private GroupOperations groupOperations;
+
   private String systemTimeZone;
 
   @Before
@@ -574,6 +587,35 @@
     assertRelated(cd.change().currentPatchSetId());
   }
 
+  @Test
+  public void getRelatedManyChanges() throws Exception {
+    List<ObjectId> commitIds = new ArrayList<>();
+    for (int i = 1; i <= 5; i++) {
+      commitIds.add(commitBuilder().add(i + ".txt", "i").message("subject: " + i).create().copy());
+    }
+    pushHead(testRepo, "refs/for/master", false);
+
+    List<RelatedChangeAndCommitInfo> expected = new ArrayList<>(commitIds.size());
+    for (ObjectId commitId : commitIds) {
+      expected.add(changeAndCommit(getPatchSetId(commitId), commitId, 1));
+    }
+    Collections.reverse(expected);
+
+    PatchSet.Id lastPsId = getPatchSetId(Iterables.getLast(commitIds));
+    assertRelated(lastPsId, expected);
+
+    Account.Id accountId = accountOperations.newAccount().create();
+    AccountGroup.UUID groupUuid = groupOperations.newGroup().addMember(accountId).create();
+    try (ProjectConfigUpdate u = updateProject(allProjects)) {
+      PermissionRule rule = Util.allow(u.getConfig(), GlobalCapability.QUERY_LIMIT, groupUuid);
+      rule.setRange(0, 2);
+      u.save();
+    }
+    atrScope.set(atrScope.newContext(null, identifiedUserFactory.create(accountId)));
+
+    assertRelated(lastPsId, expected);
+  }
+
   private RevCommit parseBody(RevCommit c) throws Exception {
     testRepo.getRevWalk().parseBody(c);
     return c;
@@ -618,13 +660,18 @@
 
   private void assertRelated(PatchSet.Id psId, RelatedChangeAndCommitInfo... expected)
       throws Exception {
+    assertRelated(psId, Arrays.asList(expected));
+  }
+
+  private void assertRelated(PatchSet.Id psId, List<RelatedChangeAndCommitInfo> expected)
+      throws Exception {
     List<RelatedChangeAndCommitInfo> actual =
         gApi.changes().id(psId.getParentKey().get()).revision(psId.get()).related().changes;
-    assertThat(actual).named("related to " + psId).hasSize(expected.length);
+    assertThat(actual).named("related to " + psId).hasSize(expected.size());
     for (int i = 0; i < actual.size(); i++) {
       String name = "index " + i + " related to " + psId;
       RelatedChangeAndCommitInfo a = actual.get(i);
-      RelatedChangeAndCommitInfo e = expected[i];
+      RelatedChangeAndCommitInfo e = expected.get(i);
       assertThat(a.project).named("project of " + name).isEqualTo(e.project);
       assertThat(a._changeNumber).named("change ID of " + name).isEqualTo(e._changeNumber);
       // Don't bother checking changeId; assume _changeNumber is sufficient.