DfsGarbageCollector: handle pack lists with multipack indexes

Now the object db can have midx packs. In that case, we expect gc to
repack all regular packs into the usual GC/GC_REST/U_G and delete any
existing midx (effectively "reverting" the pack list to a no-midx state).

Besides any regular packs, take also packs inside the midx chain for the
compaction. Keep a list of the midx packs to delete them at the end.

Note that the caller MUST set the useMultiPackIndex to true in the
object database for gc to see the midx.

Change-Id: Ib50cc06bb9b8dbbc6d1004cfc1ec17cf54328be0
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
index 80bd689..b340636 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
@@ -4,6 +4,7 @@
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC_REST;
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.MULTI_PACK_INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
 import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
@@ -18,9 +19,12 @@
 import java.io.IOException;
 import java.time.Instant;
 import java.time.ZoneOffset;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
 
 import org.eclipse.jgit.internal.storage.commitgraph.CommitGraph;
 import org.eclipse.jgit.internal.storage.commitgraph.CommitGraphWriter;
@@ -42,6 +46,7 @@
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectIdRef;
 import org.eclipse.jgit.lib.PersonIdent;
+import org.eclipse.jgit.lib.ProgressMonitor;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.revwalk.RevBlob;
@@ -62,12 +67,15 @@ public class DfsGarbageCollectorTest {
 	private DfsObjDatabase odb;
 	private MockSystemReader mockSystemReader;
 
+	private static final ProgressMonitor NULL_PM = NullProgressMonitor.INSTANCE;
+
 	@Before
 	public void setUp() throws IOException {
 		DfsRepositoryDescription desc = new DfsRepositoryDescription("test");
 		git = new TestRepository<>(new InMemoryRepository(desc));
 		repo = git.getRepository();
 		odb = repo.getObjectDatabase();
+		odb.setUseMultipackIndex(true);
 		mockSystemReader = new MockSystemReader();
 		SystemReader.setInstance(mockSystemReader);
 	}
@@ -1227,6 +1235,117 @@ public void objectSizeIndex_unreachableGarbage_noIdx() throws Exception {
 	}
 
 	@Test
+	public void midx_oneMidx_deleteMidxs_allObjectsOneGC() throws Exception {
+		String master = "refs/heads/master";
+		RevCommit root = git.branch(master).commit().message("root").noParents()
+				.create();
+		git.branch(master).commit().message("commit on head")
+				.add("file.txt", git.blob("a blob")).parent(root).create();
+		assertEquals(3, countPacks(INSERT));
+
+		DfsPackDescription midx = DfsMidxWriter.writeMidx(
+				NullProgressMonitor.INSTANCE, odb,
+				Arrays.asList(odb.getPacks()), null);
+		odb.commitPack(List.of(midx), null);
+
+		gcNoTtl();
+
+		// Only one pack, is GC but not multipack index
+		assertEquals(1, odb.getPacks().length);
+		DfsPackDescription actualDesc = odb.getPacks()[0].getPackDescription();
+		assertEquals(GC, actualDesc.getPackSource());
+		assertFalse(actualDesc.hasFileExt(MULTI_PACK_INDEX));
+		DfsPackFile pack = odb.getPacks()[0];
+		assertFalse(pack instanceof DfsPackFileMidx);
+		assertTrue(isObjectInPack(root, pack));
+	}
+
+	@Test
+	public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
+		String master = "refs/heads/master";
+		List<RevCommit> knownCommits = new ArrayList<>(11);
+		RevCommit root = git.branch(master).commit().message("root").noParents()
+				.create();
+		knownCommits.add(root);
+		RevCommit tip = root;
+		for (int i = 0; i < 10; i++) {
+			tip = git.branch(master).commit().message("commit on head")
+					.add("file.txt", git.blob("a blob " + i)).parent(tip)
+					.create();
+			knownCommits.add(tip);
+			// Each of these creates two packs
+		}
+		assertEquals(21, countPacks(INSERT));
+
+		List<DfsPackFile> basicPacks = Arrays.stream(odb.getPacks())
+				.collect(Collectors.toUnmodifiableList());
+		DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb,
+				basicPacks.subList(0, 9), null);
+		odb.commitPack(List.of(midx), null);
+
+		DfsPackDescription midx2 = DfsMidxWriter.writeMidx(NULL_PM, odb,
+				basicPacks.subList(9, 21), midx);
+		odb.commitPack(List.of(midx2), null);
+
+		// Verify we got one pack that is an midx
+		// This is testing the test code
+		assertEquals(1, odb.getPacks().length);
+		assertTrue(odb.getPacks()[0] instanceof DfsPackFileMidx);
+		DfsPackDescription theDesc = odb.getPacks()[0].getPackDescription();
+		assertTrue(theDesc.hasFileExt(MULTI_PACK_INDEX));
+		assertEquals(12, theDesc.getCoveredPacks().size());
+		assertEquals(theDesc.getMultiPackIndexBase(), midx);
+		assertEquals(9,
+				theDesc.getMultiPackIndexBase().getCoveredPacks().size());
+		gcNoTtl();
+
+		// One pack, GC WITHOUT multipack index, contains ALL objects
+		assertEquals(1, odb.getPacks().length);
+		DfsPackFile pack = odb.getPacks()[0];
+		assertEquals(GC, pack.getPackDescription().getPackSource());
+		assertFalse(pack instanceof DfsPackFileMidx);
+		assertFalse(pack.getPackDescription().hasFileExt(MULTI_PACK_INDEX));
+		for (RevCommit c : knownCommits) {
+			assertTrue(isObjectInPack(c, pack));
+		}
+	}
+
+	@Test
+	public void midx_packAndMidx_deleteMidxs_allObjectsOneGC()
+			throws Exception {
+		String master = "refs/heads/master";
+		RevCommit root = git.branch(master).commit().message("root").noParents()
+				.create();
+		RevCommit tip = git.branch(master).commit().message("commit on head")
+				.add("file.txt", git.blob("a blob")).parent(root).create();
+		assertEquals(3, countPacks(INSERT));
+
+		List<DfsPackFile> packs = Arrays.stream(odb.getPacks()).toList();
+		DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb, packs,
+				null);
+		odb.commitPack(List.of(midx), null);
+
+		RevBlob blobOutOfMidx = git.blob("some content");
+		RevCommit commitOutOfMidx = git.branch(master).commit()
+				.message("an extra commit").add("other.txt", blobOutOfMidx)
+				.parent(tip).create();
+		assertEquals(3, odb.getPacks().length); // midx + 2 new packs
+		gcNoTtl();
+
+		// Only one pack, is GC but not multipack index
+		assertEquals(1, odb.getPacks().length);
+		DfsPackDescription actualDesc = odb.getPacks()[0].getPackDescription();
+		assertEquals(GC, actualDesc.getPackSource());
+		assertFalse(actualDesc.hasFileExt(MULTI_PACK_INDEX));
+
+		DfsPackFile pack = odb.getPacks()[0];
+		assertTrue(isObjectInPack(root, pack));
+		assertTrue(isObjectInPack(root, pack));
+		assertTrue(isObjectInPack(blobOutOfMidx, pack));
+		assertTrue(isObjectInPack(commitOutOfMidx, pack));
+	}
+
+	@Test
 	public void bitmapIndexWrittenDuringGc() throws Exception {
 		int numBranches = 2;
 		int commitsPerBranch = 50;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
index 199481c..78d65b5 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java
@@ -18,6 +18,7 @@
 import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
 import static org.eclipse.jgit.internal.storage.dfs.DfsPackCompactor.configureReftable;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.COMMIT_GRAPH;
+import static org.eclipse.jgit.internal.storage.pack.PackExt.MULTI_PACK_INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.OBJECT_SIZE_INDEX;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
 import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
@@ -25,6 +26,7 @@
 
 import java.io.IOException;
 import java.time.Instant;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
@@ -33,6 +35,7 @@
 import java.util.GregorianCalendar;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
@@ -95,6 +98,8 @@ public class DfsGarbageCollector {
 	private List<DfsReftable> reftablesBefore;
 	private List<DfsPackFile> expiredGarbagePacks;
 
+	private List<DfsPackFileMidx> existingMidxs;
+
 	private Collection<Ref> refsBefore;
 	private Set<ObjectId> allHeadsAndTags;
 	private Set<ObjectId> allTags;
@@ -431,9 +436,11 @@ private Collection<Ref> getAllRefs() throws IOException {
 	}
 
 	private void readPacksBefore() throws IOException {
-		DfsPackFile[] packs = objdb.getPacks();
-		packsBefore = new ArrayList<>(packs.length);
-		expiredGarbagePacks = new ArrayList<>(packs.length);
+		DfsPackFile[] rawPacks = objdb.getPacks();
+		List<DfsPackFile> packs = getPlainPacks(rawPacks);
+		existingMidxs = getMidxPacks(rawPacks);
+		packsBefore = new ArrayList<>(packs.size());
+		expiredGarbagePacks = new ArrayList<>(packs.size());
 
 		long now = SystemReader.getInstance().now().toEpochMilli();
 		for (DfsPackFile p : packs) {
@@ -448,6 +455,41 @@ private void readPacksBefore() throws IOException {
 		}
 	}
 
+	private static List<DfsPackFile> getPlainPacks(DfsPackFile[] packs) {
+		List<DfsPackFile> plainPacks = new ArrayList<>();
+		Queue<DfsPackFile> pending = new ArrayDeque<>(
+				Arrays.stream(packs).toList());
+		while (!pending.isEmpty()) {
+			DfsPackFile pack = pending.poll();
+			if (pack instanceof DfsPackFileMidx midxPack) {
+				plainPacks.addAll(midxPack.getCoveredPacks());
+				if (midxPack.getMultipackIndexBase() != null) {
+					pending.add(midxPack.getMultipackIndexBase());
+				}
+			} else {
+				plainPacks.add(pack);
+			}
+		}
+		return plainPacks;
+	}
+
+	private static List<DfsPackFileMidx> getMidxPacks(DfsPackFile[] packs) {
+		List<DfsPackFileMidx> topLevelMidxs = Arrays.stream(packs).filter(
+				p -> p.getPackDescription().hasFileExt(MULTI_PACK_INDEX))
+				.map(p -> (DfsPackFileMidx) p).toList();
+
+		List<DfsPackFileMidx> midxPacks = new ArrayList<>();
+		Queue<DfsPackFileMidx> pending = new ArrayDeque<>(topLevelMidxs);
+		while (!pending.isEmpty()) {
+			DfsPackFileMidx midx = pending.poll();
+			midxPacks.add(midx);
+			if (midx.getMultipackIndexBase() != null) {
+				pending.add(midx.getMultipackIndexBase());
+			}
+		}
+		return midxPacks;
+	}
+
 	private void readReftablesBefore() throws IOException {
 		DfsReftable[] tables = objdb.getReftables();
 		reftablesBefore = new ArrayList<>(Arrays.asList(tables));
@@ -568,6 +610,11 @@ private Set<DfsPackDescription> toPrune() {
 		for (DfsPackFile pack : expiredGarbagePacks) {
 			toPrune.add(pack.getPackDescription());
 		}
+
+		for (DfsPackFileMidx pack : existingMidxs) {
+			toPrune.add(pack.getPackDescription());
+		}
+
 		return toPrune;
 	}