MidxPackFilter: Keep max one midx in the stack
We are keeping multiple disconnected midxs in the pack list (if
valid). The only reason for this disconnection is a race condition in
gc/compact and we don't need all of them in the pack list.
Take only the most recent valid midx for the pack list. Remove all other
uncovered midxs from the packlist.
Change-Id: If6697e9f92d9cdf5fc7bd28be3cc93596a6a6964
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
index 0752cd5..c5acb95 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
@@ -1243,9 +1243,7 @@ public void midx_oneMidx_deleteMidxs_allObjectsOneGC() throws Exception {
.add("file.txt", git.blob("a blob")).parent(root).create();
assertEquals(3, countPacks(INSERT));
- DfsPackDescription midx = DfsMidxWriter.writeMidx(
- NullProgressMonitor.INSTANCE, odb,
- Arrays.asList(odb.getPacks()), null);
+ DfsPackDescription midx = midx(Arrays.asList(odb.getPacks()), null);
odb.commitPack(List.of(midx), null);
gcNoTtl();
@@ -1279,13 +1277,11 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
List<DfsPackFile> basicPacks = Arrays.stream(odb.getPacks())
.collect(Collectors.toUnmodifiableList());
- DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb,
- basicPacks.subList(0, 9), null);
- odb.commitPack(List.of(midx), null);
+ DfsPackDescription baseMidx = midx(basicPacks.subList(0, 9), null);
+ odb.commitPack(List.of(baseMidx), null);
- DfsPackDescription midx2 = DfsMidxWriter.writeMidx(NULL_PM, odb,
- basicPacks.subList(9, 21), midx);
- odb.commitPack(List.of(midx2), null);
+ DfsPackDescription tipMidx = midx(basicPacks.subList(9, 21), baseMidx);
+ odb.commitPack(List.of(tipMidx), null);
// Verify we got one pack that is an midx
// This is testing the test code
@@ -1294,7 +1290,7 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
DfsPackDescription theDesc = odb.getPacks()[0].getPackDescription();
assertTrue(theDesc.hasFileExt(MULTI_PACK_INDEX));
assertEquals(12, theDesc.getCoveredPacks().size());
- assertEquals(theDesc.getMultiPackIndexBase(), midx);
+ assertEquals(theDesc.getMultiPackIndexBase(), baseMidx);
assertEquals(9,
theDesc.getMultiPackIndexBase().getCoveredPacks().size());
gcNoTtl();
@@ -1308,8 +1304,8 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
for (RevCommit c : knownCommits) {
assertTrue(isObjectInPack(c, pack));
}
- assertFalse(odb.listPacks().contains(midx));
- assertFalse(odb.listPacks().contains(midx2));
+ assertFalse(odb.listPacks().contains(baseMidx));
+ assertFalse(odb.listPacks().contains(tipMidx));
}
@Test
@@ -1323,8 +1319,7 @@ public void midx_packAndMidx_deleteMidxs_allObjectsOneGC()
assertEquals(3, countPacks(INSERT));
List<DfsPackFile> packs = Arrays.stream(odb.getPacks()).toList();
- DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb, packs,
- null);
+ DfsPackDescription midx = midx(packs, null);
odb.commitPack(List.of(midx), null);
RevBlob blobOutOfMidx = git.blob("some content");
@@ -1500,6 +1495,15 @@ private static DfsPackFile findFirstBySource(DfsPackFile[] packs, PackSource sou
.findFirst().get();
}
+ private DfsPackDescription midx(List<DfsPackFile> coveredPacks,
+ DfsPackDescription base) throws IOException {
+ DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb,
+ coveredPacks, base);
+ git.tick(1);
+ midx.setLastModified(git.getInstant().toEpochMilli());
+ return midx;
+ }
+
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
return git.commit();
}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/MidxPackFilterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/MidxPackFilterTest.java
index 584bab1..68aacba 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/MidxPackFilterTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/MidxPackFilterTest.java
@@ -130,6 +130,81 @@ public void useMidx_nestedMidxAndOnePack_topMidxAndPack() {
}
@Test
+ public void useMidx_unconnectedValidMidx_onlyTop() {
+
+ DfsPackDescription gc = pack("aaaa", GC, PACK);
+ DfsPackDescription compact = pack("cccc", COMPACT, PACK);
+ DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
+ firstMidx.setCoveredPacks(List.of(gc, compact));
+
+ DfsPackDescription unconnectedValid = pack("randoMidx", GC,
+ MULTI_PACK_INDEX);
+ unconnectedValid.setCoveredPacks(List.of(gc));
+
+ DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
+ DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
+ DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
+ topMidx.setCoveredPacks(List.of(compact2, compact3));
+ topMidx.setMultiPackIndexBase(firstMidx);
+
+ List<DfsPackDescription> reorgPacks = MidxPackFilter
+ .useMidx(List.of(gc, compact, firstMidx, unconnectedValid,
+ compact2, compact3, topMidx));
+ assertEquals(1, reorgPacks.size());
+ assertTrue(reorgPacks.contains(topMidx));
+ }
+
+ @Test
+ public void useMidx_unconnectedInvalidMidx_onlyTop() {
+ DfsPackDescription gc = pack("aaaa", GC, PACK);
+ DfsPackDescription compact = pack("cccc", COMPACT, PACK);
+ DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
+ firstMidx.setCoveredPacks(List.of(gc, compact));
+
+ DfsPackDescription notCommitted = pack("not-in-db", GC, PACK);
+ DfsPackDescription unconnectedInvalid = pack("randoMidx", GC,
+ MULTI_PACK_INDEX);
+ unconnectedInvalid.setCoveredPacks(List.of(notCommitted));
+
+ DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
+ DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
+ DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
+ topMidx.setCoveredPacks(List.of(compact2, compact3));
+ topMidx.setMultiPackIndexBase(firstMidx);
+
+ List<DfsPackDescription> reorgPacks = MidxPackFilter
+ .useMidx(List.of(gc, compact, firstMidx, unconnectedInvalid,
+ compact2, compact3, topMidx));
+ assertEquals(1, reorgPacks.size());
+ assertTrue(reorgPacks.contains(topMidx));
+ }
+
+ @Test
+ public void useMidx_latestMidxInvalid_takeNext() {
+ DfsPackDescription gc = pack("aaaa", GC, PACK);
+ DfsPackDescription compact = pack("cccc", COMPACT, PACK);
+ DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
+ firstMidx.setCoveredPacks(List.of(gc, compact));
+
+ DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
+ DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
+ DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
+ topMidx.setCoveredPacks(List.of(compact2, compact3));
+ topMidx.setMultiPackIndexBase(firstMidx);
+
+ DfsPackDescription notCommitted = pack("not-in-db", GC, PACK);
+ DfsPackDescription unconnectedInvalid = pack("randoMidx", GC,
+ MULTI_PACK_INDEX);
+ unconnectedInvalid.setCoveredPacks(List.of(notCommitted));
+
+ List<DfsPackDescription> reorgPacks = MidxPackFilter
+ .useMidx(List.of(gc, compact, firstMidx, unconnectedInvalid,
+ compact2, compact3, topMidx));
+ assertEquals(1, reorgPacks.size());
+ assertTrue(reorgPacks.contains(topMidx));
+ }
+
+ @Test
public void skipMidx_oneMidxCoversAll_allPacks() {
DfsPackDescription gc = pack("aaaa", GC, PACK);
DfsPackDescription compact = pack("cccc", COMPACT, PACK);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/MidxPackFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/MidxPackFilter.java
index 50de974..1183306 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/MidxPackFilter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/MidxPackFilter.java
@@ -9,13 +9,16 @@
*/
package org.eclipse.jgit.internal.storage.dfs;
-import org.eclipse.jgit.internal.storage.pack.PackExt;
-
+import java.util.ArrayList;
+import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
+import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
+import org.eclipse.jgit.internal.storage.pack.PackExt;
+
/**
* Format a flat list of packs and midxs into a valid list of packs.
* <p>
@@ -30,6 +33,23 @@ public class MidxPackFilter {
private MidxPackFilter() {
}
+ private static Comparator<DfsPackDescription> midxComparator = Comparator
+ .comparingLong(DfsPackDescription::getLastModified)
+ // If multiple midx where written at the same time, take the one
+ // with covering most objects first (it must be the tip)
+ .thenComparingLong(MidxPackFilter::getTotalCoveredObjects)
+ .reversed();
+
+ private static long getTotalCoveredObjects(DfsPackDescription desc) {
+ long objCount = 0;
+ DfsPackDescription current = desc;
+ while (current != null) {
+ objCount += current.getObjectCount();
+ current = current.getMultiPackIndexBase();
+ }
+ return objCount;
+ }
+
/**
* Reorganize the flat list of packs removing the midxs
*
@@ -59,40 +79,64 @@ public static List<DfsPackDescription> skipMidxs(
*/
public static List<DfsPackDescription> useMidx(
List<DfsPackDescription> packs) {
- // Take the packs covered by the midxs out of the list
List<DfsPackDescription> midxs = packs.stream()
.filter(desc -> desc.hasFileExt(PackExt.MULTI_PACK_INDEX))
- .toList();
+ .sorted(midxComparator).toList();
+ for (DfsPackDescription d : midxs) {
+ System.out.println(String.format(" %s - %d - %d", d.getPackName(),
+ d.getLastModified(), getTotalCoveredObjects(d)));
+ }
if (midxs.isEmpty()) {
return packs;
}
- Set<DfsPackDescription> inputPacks = new HashSet<>(packs);
- Set<DfsPackDescription> allCoveredPacks = new HashSet<>();
- for (DfsPackDescription midx : midxs) {
- Set<DfsPackDescription> coveredPacks = new HashSet<>();
- findCoveredPacks(midx, coveredPacks);
- if (!inputPacks.containsAll(coveredPacks)) {
- // This midx references packs not in the pack db.
- // It could be part of a chain, so we just ignore all midxs
- return skipMidxs(packs);
- }
- allCoveredPacks.addAll(coveredPacks);
+ Set<DfsPackDescription> packsSet = new HashSet<>(packs);
+ Optional<DfsPackDescription> bestMidx = midxs.stream()
+ .filter(midx -> isValid(midx, packsSet)).findFirst();
+ if (bestMidx.isEmpty()) {
+ return skipMidxs(packs);
}
- return packs.stream().filter(d -> !allCoveredPacks.contains(d))
- .collect(Collectors.toList());
+ // Take the packs covered by the midxs and other midxs themselves out of
+ // the list
+ Set<DfsPackDescription> coveredPacksAndMidxs = getAllCoveredPacks(
+ bestMidx.get());
+ return packs.stream().filter(p -> !coveredPacksAndMidxs.contains(p))
+ // At this point, any midx in the list besides bestMidx is a
+ // straggler.
+ .filter(p -> !p.hasFileExt(PackExt.MULTI_PACK_INDEX)
+ || p.equals(bestMidx.get()))
+ .collect(Collectors.toCollection(ArrayList::new));
}
- private static void findCoveredPacks(DfsPackDescription midx,
- Set<DfsPackDescription> covered) {
- if (!midx.getCoveredPacks().isEmpty()) {
- covered.addAll(midx.getCoveredPacks());
- }
+ private static boolean isValid(DfsPackDescription midx,
+ Set<DfsPackDescription> packs) {
+ DfsPackDescription tip = midx;
+ while (tip != null) {
+ if (!packs.containsAll(tip.getCoveredPacks())) {
+ return false;
+ }
- if (midx.getMultiPackIndexBase() != null) {
- findCoveredPacks(midx.getMultiPackIndexBase(), covered);
- covered.add(midx.getMultiPackIndexBase());
+ tip = tip.getMultiPackIndexBase();
}
+ return true;
+ }
+
+ private static Set<DfsPackDescription> getAllCoveredPacks(
+ DfsPackDescription midx) {
+ Set<DfsPackDescription> covered = new HashSet<>();
+ DfsPackDescription current = midx;
+ while (current != null) {
+ if (!current.getCoveredPacks().isEmpty()) {
+ covered.addAll(current.getCoveredPacks());
+ }
+
+ DfsPackDescription base = current.getMultiPackIndexBase();
+ if (base != null) {
+ covered.add(base);
+ }
+ current = base;
+ }
+ return covered;
}
}