UploadPack: Extract walk-based reachability check

Preparing the code to optimize the bitmap-based object reachability
checker.  We are mirroring first the commit reachability checker
structure (interface + 2 implementations).

Move the walk-base reachability checker to its own class.

This class is public at the moment. Later ObjectWalk will return an
interface and this implementation will be package-private.

Change-Id: Ifac70094e1af137291c3607d95e689992f814b26
Signed-off-by: Ivan Frade <ifrade@google.com>
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java
new file mode 100644
index 0000000..f8fc0c9
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityTest.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2020, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.revwalk;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+import org.eclipse.jgit.internal.storage.file.FileRepository;
+import org.eclipse.jgit.junit.LocalDiskRepositoryTestCase;
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.junit.TestRepository.CommitBuilder;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PedestrianObjectReachabilityTest
+		extends LocalDiskRepositoryTestCase {
+
+	PedestrianObjectReachabilityChecker getChecker(
+			TestRepository<FileRepository> repository)
+			throws Exception {
+		return new PedestrianObjectReachabilityChecker(
+				repository.getRevWalk().toObjectWalkWithSameObjects());
+	}
+
+	private TestRepository<FileRepository> repo;
+
+	private AddressableRevCommit baseCommit;
+
+	private AddressableRevCommit branchACommit;
+
+	private AddressableRevCommit branchBCommit;
+
+	private AddressableRevCommit mergeCommit;
+
+	/** {@inheritDoc} */
+	@Override
+	@Before
+	public void setUp() throws Exception {
+		super.setUp();
+		FileRepository db = createWorkRepository();
+		repo = new TestRepository<>(db);
+		prepareRepo();
+	}
+
+	@Test
+	public void blob_in_base_reachable_from_branches() throws Exception {
+		PedestrianObjectReachabilityChecker checker = getChecker(repo);
+
+		RevObject baseBlob = baseCommit.blob;
+		assertReachable("reachable from one branch", checker.areAllReachable(
+				Arrays.asList(baseBlob), Stream.of(branchACommit.commit)));
+		assertReachable("reachable from another branch",
+				checker.areAllReachable(
+				Arrays.asList(baseBlob), Stream.of(branchBCommit.commit)));
+	}
+
+	@Test
+	public void blob_reachable_from_owning_commit() throws Exception {
+		PedestrianObjectReachabilityChecker checker = getChecker(repo);
+
+		RevObject branchABlob = branchACommit.blob;
+		assertReachable("reachable from itself",
+				checker.areAllReachable(Arrays.asList(branchABlob),
+						Stream.of(branchACommit.commit)));
+	}
+
+	@Test
+	public void blob_in_branch_reachable_from_merge() throws Exception {
+		PedestrianObjectReachabilityChecker checker = getChecker(repo);
+
+		RevObject branchABlob = branchACommit.blob;
+		assertReachable("reachable from merge", checker.areAllReachable(
+				Arrays.asList(branchABlob), Stream.of(mergeCommit.commit)));
+	}
+
+	@Test
+	public void blob_unreachable_from_earlier_commit() throws Exception {
+		PedestrianObjectReachabilityChecker checker = getChecker(repo);
+
+		RevObject branchABlob = branchACommit.blob;
+		assertUnreachable("unreachable from earlier commit",
+				checker.areAllReachable(Arrays.asList(branchABlob),
+						Stream.of(baseCommit.commit)));
+	}
+
+	@Test
+	public void blob_unreachable_from_parallel_branch() throws Exception {
+		PedestrianObjectReachabilityChecker checker = getChecker(repo);
+
+		RevObject branchABlob = branchACommit.blob;
+		assertUnreachable("unreachable from another branch",
+				checker.areAllReachable(Arrays.asList(branchABlob),
+						Stream.of(branchBCommit.commit)));
+	}
+
+	private void prepareRepo()
+			throws Exception {
+		baseCommit = createCommit("base");
+		branchACommit = createCommit("branchA", baseCommit);
+		branchBCommit = createCommit("branchB", baseCommit);
+		mergeCommit = createCommit("merge", branchACommit, branchBCommit);
+
+		// Bitmaps are generated from references
+		// TODO(ifrade): These are not needed now, but will be when we use these
+		// tests with the bitmap implementation.
+		repo.update("refs/heads/a", branchACommit.commit);
+		repo.update("refs/heads/b", branchBCommit.commit);
+		repo.update("refs/heads/merge", mergeCommit.commit);
+	}
+
+	private AddressableRevCommit createCommit(String blobPath,
+			AddressableRevCommit... parents) throws Exception {
+		RevBlob blob = repo.blob(blobPath + " content");
+		CommitBuilder commitBuilder = repo.commit();
+		for (int i = 0; i < parents.length; i++) {
+			commitBuilder.parent(parents[i].commit);
+		}
+		commitBuilder.add(blobPath, blob);
+
+		RevCommit commit = commitBuilder.create();
+		return new AddressableRevCommit(commit, blob);
+	}
+
+	// Pair of commit and blob inside it
+	static class AddressableRevCommit {
+		RevCommit commit;
+
+		RevBlob blob;
+
+		AddressableRevCommit(RevCommit commit, RevBlob blob) {
+			this.commit = commit;
+			this.blob = blob;
+		}
+	}
+
+	private static void assertReachable(String msg,
+			Optional<RevObject> result) {
+		assertFalse(msg, result.isPresent());
+	}
+
+	private static void assertUnreachable(String msg,
+			Optional<RevObject> result) {
+		assertTrue(msg, result.isPresent());
+	}
+
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java
new file mode 100644
index 0000000..6e027ce
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/PedestrianObjectReachabilityChecker.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2020, Google LLC and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.revwalk;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.errors.MissingObjectException;
+import org.eclipse.jgit.lib.AnyObjectId;
+
+/**
+ * Checks if all objects are reachable from certain starting points doing a
+ * walk.
+ *
+ * This is an expensive check that browses commits, trees, blobs and tags. For
+ * reachability just between commits see {@link ReachabilityChecker}
+ * implementations.
+ *
+ * TODO(ifrade): This class won't be public when the interface is introduced.
+ * Skipping the @since.
+ */
+public class PedestrianObjectReachabilityChecker {
+	private ObjectWalk walk;
+
+	/**
+	 * New instance of the reachability checker using a existing walk.
+	 *
+	 * @param walk
+	 *            ObjectWalk instance to reuse. Caller retains ownership.
+	 */
+	public PedestrianObjectReachabilityChecker(ObjectWalk walk) {
+		this.walk = walk;
+	}
+
+	/**
+	 * Checks that all targets are reachable from the starters.
+	 *
+	 * @param targets
+	 *            objects we want to reach from the starters
+	 * @param starters
+	 *            objects known to be reachable to the caller
+	 * @return Optional with an unreachable target if there is any (there could
+	 *         be more than one). Empty optional means all targets are
+	 *         reachable.
+	 * @throws MissingObjectException
+	 *             An object was missing. This should not happen as the caller
+	 *             checked this while doing
+	 *             {@link RevWalk#parseAny(AnyObjectId)} to convert ObjectIds to
+	 *             RevObjects.
+	 * @throws IncorrectObjectTypeException
+	 *             Incorrect object type. As with missing objects, this should
+	 *             not happen if the caller used
+	 *             {@link RevWalk#parseAny(AnyObjectId)}.
+	 * @throws IOException
+	 *             Cannot access underlying storage
+	 */
+	public Optional<RevObject> areAllReachable(Collection<RevObject> targets,
+			Stream<RevObject> starters) throws MissingObjectException,
+			IncorrectObjectTypeException, IOException {
+		walk.reset();
+		walk.sort(RevSort.TOPO);
+		for (RevObject target : targets) {
+			walk.markStart(target);
+		}
+
+		Iterator<RevObject> iterator = starters.iterator();
+		while (iterator.hasNext()) {
+			RevObject o = iterator.next();
+			walk.markUninteresting(o);
+
+			RevObject peeled = walk.peel(o);
+			if (peeled instanceof RevCommit) {
+				// By default, for performance reasons, ObjectWalk does not mark
+				// a tree as uninteresting when we mark a commit. Mark it
+				// ourselves so that we can determine reachability exactly.
+				walk.markUninteresting(((RevCommit) peeled).getTree());
+			}
+		}
+
+		RevCommit commit = walk.next();
+		if (commit != null) {
+			return Optional.of(commit);
+		}
+
+		RevObject object = walk.nextObject();
+		if (object != null) {
+			return Optional.of(object);
+		}
+
+		return Optional.empty();
+	}
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java
index bb826d8..0552909 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java
@@ -80,12 +80,12 @@
 import org.eclipse.jgit.revwalk.BitmapWalker;
 import org.eclipse.jgit.revwalk.DepthWalk;
 import org.eclipse.jgit.revwalk.ObjectWalk;
+import org.eclipse.jgit.revwalk.PedestrianObjectReachabilityChecker;
 import org.eclipse.jgit.revwalk.ReachabilityChecker;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevFlag;
 import org.eclipse.jgit.revwalk.RevFlagSet;
 import org.eclipse.jgit.revwalk.RevObject;
-import org.eclipse.jgit.revwalk.RevSort;
 import org.eclipse.jgit.revwalk.RevTag;
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.revwalk.filter.CommitTimeRevFilter;
@@ -1910,36 +1910,6 @@
 		}
 	}
 
-	private static void checkReachabilityByWalkingObjects(ObjectWalk walk,
-			List<RevObject> wants, Set<ObjectId> reachableFrom) throws IOException {
-
-		walk.sort(RevSort.TOPO);
-		for (RevObject want : wants) {
-			walk.markStart(want);
-		}
-		for (ObjectId have : reachableFrom) {
-			RevObject o = walk.parseAny(have);
-			walk.markUninteresting(o);
-
-			RevObject peeled = walk.peel(o);
-			if (peeled instanceof RevCommit) {
-				// By default, for performance reasons, ObjectWalk does not mark a
-				// tree as uninteresting when we mark a commit. Mark it ourselves so
-				// that we can determine reachability exactly.
-				walk.markUninteresting(((RevCommit) peeled).getTree());
-			}
-		}
-
-		RevCommit commit = walk.next();
-		if (commit != null) {
-			throw new WantNotValidException(commit);
-		}
-		RevObject object = walk.nextObject();
-		if (object != null) {
-			throw new WantNotValidException(object);
-		}
-	}
-
 	private static void checkNotAdvertisedWants(UploadPack up,
 			List<ObjectId> notAdvertisedWants, Collection<Ref> visibleRefs)
 			throws IOException {
@@ -1967,8 +1937,17 @@
 						// operator is willing to pay the cost of these
 						// reachability checks.
 						try (ObjectWalk objWalk = walk.toObjectWalkWithSameObjects()) {
-							checkReachabilityByWalkingObjects(objWalk,
-									wantsAsObjs, reachableFrom);
+							List<RevObject> havesAsObjs = objectIdsToRevObjects(
+									objWalk, reachableFrom);
+							PedestrianObjectReachabilityChecker reachabilityChecker = new PedestrianObjectReachabilityChecker(
+									objWalk);
+							Optional<RevObject> unreachable = reachabilityChecker
+									.areAllReachable(wantsAsObjs,
+											havesAsObjs.stream());
+							if (unreachable.isPresent()) {
+								throw new WantNotValidException(
+										unreachable.get());
+							}
 						}
 						return;
 					}