MultiPackIndexWriter: write "bitmapped packfiles" chunk

We are not writing the "bitmapped packfiles" chunk in the midx. This
chunk tells how many bits in the bitmap belong to each pack and helps
to speed up the lookup of offsets in the reverse index.

Update the pack index merger to keep track of how many objects are
selected per pack. Use that data to write the "bitmapped packfiles"
chunk in the midx writer.

Change-Id: I5cb48fc6cc180ccddbe14d20993191ea6a6a6964
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java
index 9b085a3..5971dceb 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriterTest.java
@@ -10,6 +10,7 @@
 package org.eclipse.jgit.internal.storage.midx;
 
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_BITMAPPEDPACKS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_LARGEOFFSETS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
@@ -54,21 +55,23 @@ public void write_allSmallOffsets() throws IOException {
 		ByteArrayOutputStream out = new ByteArrayOutputStream();
 		writer.write(NullProgressMonitor.INSTANCE, out, data);
 		// header (12 bytes)
-		// + chunkHeader (6 * 12 bytes)
+		// + chunkHeader (7 * 12 bytes)
 		// + fanout table (256 * 4 bytes)
 		// + OIDs (6 * 20 bytes)
 		// + (pack, offset) pairs (6 * 8)
 		// + RIDX (6 * 4 bytes)
+		// + bitmap pack segments (2 * 8)
 		// + packfile names (2 * 10)
 		// + checksum (20)
-		assertEquals(1340, out.size());
+		assertEquals(1368, out.size());
 		List<Integer> chunkIds = readChunkIds(out);
-		assertEquals(5, chunkIds.size());
+		assertEquals(6, chunkIds.size());
 		assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
 		assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
 		assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
 		assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
-		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_BITMAPPEDPACKS));
+		assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
 	}
 
 	@Test
@@ -90,21 +93,23 @@ public void write_smallOffset_limit() throws IOException {
 		ByteArrayOutputStream out = new ByteArrayOutputStream();
 		writer.write(NullProgressMonitor.INSTANCE, out, data);
 		// header (12 bytes)
-		// + chunkHeader (6 * 12 bytes)
+		// + chunkHeader (7 * 12 bytes)
 		// + fanout table (256 * 4 bytes)
 		// + OIDs (6 * 20 bytes)
 		// + (pack, offset) pairs (6 * 8)
 		// + RIDX (6 * 4 bytes)
+		// + bitmap pack segments (2 * 8)
 		// + packfile names (2 * 10)
 		// + checksum (20)
-		assertEquals(1340, out.size());
+		assertEquals(1368, out.size());
 		List<Integer> chunkIds = readChunkIds(out);
-		assertEquals(5, chunkIds.size());
+		assertEquals(6, chunkIds.size());
 		assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
 		assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
 		assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
 		assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
-		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_BITMAPPEDPACKS));
+		assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
 	}
 
 	@Test
@@ -126,23 +131,25 @@ public void write_largeOffset() throws IOException {
 		MultiPackIndexWriter.Result result = writer
 				.write(NullProgressMonitor.INSTANCE, out, data);
 		// header (12 bytes)
-		// + chunkHeader (7 * 12 bytes)
+		// + chunkHeader (8 * 12 bytes)
 		// + fanout table (256 * 4 bytes)
 		// + OIDs (6 * 20 bytes)
 		// + (pack, offset) pairs (6 * 8)
 		// + (large-offset) (1 * 8)
 		// + RIDX (6 * 4 bytes)
+		// + bitmap pack segments (2 * 8)
 		// + packfile names (2 * 10)
 		// + checksum (20)
-		assertEquals(1360, out.size());
+		assertEquals(1388, out.size());
 		List<Integer> chunkIds = readChunkIds(out);
-		assertEquals(6, chunkIds.size());
+		assertEquals(7, chunkIds.size());
 		assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
 		assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
 		assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
 		assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_LARGEOFFSETS));
 		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
-		assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+		assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_BITMAPPEDPACKS));
+		assertEquals(6, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
 
 		assertEquals("bbbbbbbbb", result.packNames().get(0));
 		assertEquals("aaaaaaaaa", result.packNames().get(1));
@@ -159,13 +166,14 @@ public void jgit_emptyMidx() throws IOException {
 		ByteArrayOutputStream out = new ByteArrayOutputStream();
 		writer.write(NullProgressMonitor.INSTANCE, out, packs);
 		List<Integer> chunkIds = readChunkIds(out);
-		assertEquals(1134, out.size());
-		assertEquals(5, chunkIds.size());
+		assertEquals(1162, out.size());
+		assertEquals(6, chunkIds.size());
 		assertEquals(0, chunkIds.indexOf(MIDX_CHUNKID_OIDFANOUT));
 		assertEquals(1, chunkIds.indexOf(MIDX_CHUNKID_OIDLOOKUP));
 		assertEquals(2, chunkIds.indexOf(MIDX_CHUNKID_OBJECTOFFSETS));
 		assertEquals(3, chunkIds.indexOf(MIDX_CHUNKID_REVINDEX));
-		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
+		assertEquals(4, chunkIds.indexOf(MIDX_CHUNKID_BITMAPPEDPACKS));
+		assertEquals(5, chunkIds.indexOf(MIDX_CHUNKID_PACKNAMES));
 	}
 
 	private List<Integer> readChunkIds(ByteArrayOutputStream out) {
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/PackIndexMergerTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/PackIndexMergerTest.java
index 0eafdaf..a43992d 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/PackIndexMergerTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/midx/PackIndexMergerTest.java
@@ -9,6 +9,7 @@
  */
 package org.eclipse.jgit.internal.storage.midx;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -254,6 +255,65 @@ public void bySha1Iterator_largeOffsets_noChunk() {
 		assertEquals(3, merger.getUniqueObjectCount());
 	}
 
+	@Test
+	public void getObjectsPerPack_noDuplicates() {
+		PackIndex idxOne = indexOf(
+				oidOffset("0000000000000000000000000000000000000001", 500),
+				oidOffset("0000000000000000000000000000000000000005", 12),
+				oidOffset("0000000000000000000000000000000000000010", 1500));
+		PackIndex idxTwo = indexOf(
+				oidOffset("0000000000000000000000000000000000000002", 501),
+				oidOffset("0000000000000000000000000000000000000003", 13),
+				oidOffset("0000000000000000000000000000000000000015", 1501));
+		PackIndex idxThree = indexOf(
+				oidOffset("0000000000000000000000000000000000000004", 502),
+				oidOffset("0000000000000000000000000000000000000007", 14),
+				oidOffset("0000000000000000000000000000000000000012", 1502));
+		PackIndexMerger merger = new PackIndexMerger(
+				orderedMapOf("p1", idxOne, "p2", idxTwo, "p3", idxThree));
+		assertArrayEquals(new int[] { 3, 3, 3 }, merger.getObjectsPerPack());
+	}
+
+	@Test
+	public void getObjectsPerPack_differentIndexSizes() {
+		PackIndex idxOne = indexOf(
+				oidOffset("0000000000000000000000000000000000000010", 1500));
+		PackIndex idxTwo = indexOf(
+				oidOffset("0000000000000000000000000000000000000002", 500),
+				oidOffset("0000000000000000000000000000000000000003", 12));
+		PackIndex idxThree = indexOf(
+				oidOffset("0000000000000000000000000000000000000004", 500),
+				oidOffset("0000000000000000000000000000000000000007", 12),
+				oidOffset("0000000000000000000000000000000000000012", 1500));
+		PackIndexMerger merger = new PackIndexMerger(
+				orderedMapOf("p1", idxOne, "p2", idxTwo, "p3", idxThree));
+		assertArrayEquals(new int[] { 1, 2, 3 }, merger.getObjectsPerPack());
+	}
+
+	@Test
+	public void getObjectsPerPack_allDuplicates() {
+		PackIndex idxOne = indexOf(
+				oidOffset("0000000000000000000000000000000000000001", 500),
+				oidOffset("0000000000000000000000000000000000000005", 12),
+				oidOffset("0000000000000000000000000000000000000010", 1500));
+		PackIndexMerger merger = new PackIndexMerger(
+				orderedMapOf("p1", idxOne, "p2", idxOne, "p3", idxOne));
+		assertArrayEquals(new int[] { 3, 0, 0 }, merger.getObjectsPerPack());
+	}
+
+	@Test
+	public void getObjectsPerPack_noIndexes() {
+		PackIndexMerger merger = new PackIndexMerger(new LinkedHashMap<>());
+		assertArrayEquals(new int[] {}, merger.getObjectsPerPack());
+	}
+
+	@Test
+	public void getObjectsPerPack_emptyIndexes() {
+		PackIndexMerger merger = new PackIndexMerger(
+				orderedMapOf("p1", indexOf(), "p2", indexOf()));
+		assertArrayEquals(new int[] { 0, 0 }, merger.getObjectsPerPack());
+	}
+
 	private static void assertNextEntry(
 			Iterator<PackIndexMerger.MidxMutableEntry> it, String oid,
 			int packId, long offset) {
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java
index 993e7df..4d2e580 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/MultiPackIndexWriter.java
@@ -10,6 +10,7 @@
 package org.eclipse.jgit.internal.storage.midx;
 
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.CHUNK_LOOKUP_WIDTH;
+import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_BITMAPPEDPACKS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_LARGEOFFSETS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OBJECTOFFSETS;
 import static org.eclipse.jgit.internal.storage.midx.MultiPackIndexConstants.MIDX_CHUNKID_OIDFANOUT;
@@ -137,6 +138,8 @@ private List<ChunkHeader> createChunkHeaders(PackIndexMerger data) {
 		}
 		chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_REVINDEX,
 				4L * data.getUniqueObjectCount(), this::writeRidx));
+		chunkHeaders.add(new ChunkHeader(MIDX_CHUNKID_BITMAPPEDPACKS,
+				8L * data.getPackCount(), this::writeBitmappedPackfiles));
 
 		int packNamesSize = data.getPackNames().stream()
 				.mapToInt(String::length).map(i -> i + 1 /* null at the end */)
@@ -308,13 +311,12 @@ private void writeRidx(WriteContext ctx) throws IOException {
 			MidxMutableEntry e = iterator.next();
 			OffsetPosition op = new OffsetPosition(e.getOffset(), midxPosition);
 			midxPosition++;
-			packOffsets.computeIfAbsent(Integer.valueOf(e.getPackId()),
-					k -> new ArrayList<>()).add(op);
+			packOffsets.computeIfAbsent(e.getPackId(), k -> new ArrayList<>())
+					.add(op);
 		}
 
 		for (int i = 0; i < ctx.data.getPackCount(); i++) {
-			List<OffsetPosition> offsetsForPack = packOffsets
-					.get(Integer.valueOf(i));
+			List<OffsetPosition> offsetsForPack = packOffsets.get(i);
 			if (offsetsForPack == null) {
 				continue;
 			}
@@ -328,6 +330,21 @@ private void writeRidx(WriteContext ctx) throws IOException {
 		}
 	}
 
+	private void writeBitmappedPackfiles(WriteContext ctx) throws IOException {
+		int[] objsPerPack = ctx.data.getObjectsPerPack();
+
+		byte[] buffer = new byte[8 * objsPerPack.length];
+		int bufferPos = 0;
+		int accruedBitmapPositions = 0;
+		for (int pack = 0; pack < objsPerPack.length; pack++) {
+			NB.encodeInt32(buffer, bufferPos, accruedBitmapPositions);
+			NB.encodeInt32(buffer, bufferPos + 4, objsPerPack[pack]);
+			accruedBitmapPositions += objsPerPack[pack];
+			bufferPos += 8;
+		}
+		ctx.out.write(buffer);
+	}
+
 	/**
 	 * Write the large offset chunk
 	 * <p>
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/PackIndexMerger.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/PackIndexMerger.java
index fd040b9..4a296ee 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/PackIndexMerger.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/midx/PackIndexMerger.java
@@ -92,6 +92,8 @@ private void fill(int packId, PackIndex.MutableEntry other) {
 
 	private final int uniqueObjectCount;
 
+	private final int[] objectsPerPack;
+
 	/**
 	 * Build a common view of these pack indexes
 	 * <p>
@@ -104,6 +106,7 @@ private void fill(int packId, PackIndex.MutableEntry other) {
 		this.packNames = packs.keySet().stream().toList();
 		this.indexes = packs.values().stream().toList();
 
+		objectsPerPack = new int[packNames.size()];
 		// Iterate for duplicates
 		int objectCount = 0;
 		boolean hasLargeOffsets = false;
@@ -127,6 +130,7 @@ private void fill(int packId, PackIndex.MutableEntry other) {
 
 			lastSeen.fromObjectId(entry.oid);
 			objectCount++;
+			objectsPerPack[entry.packId]++;
 		}
 		uniqueObjectCount = objectCount;
 		offsetsOver31BitsCount = over31bits;
@@ -164,6 +168,16 @@ int getOffsetsOver31BitsCount() {
 	}
 
 	/**
+	 * Number of objects selected for the midx per packid
+	 *
+	 * @return array where position n contains the amount of objects selected
+	 *         for pack id n
+	 */
+	int[] getObjectsPerPack() {
+		return objectsPerPack;
+	}
+
+	/**
 	 * List of pack names in alphabetical order.
 	 * <p>
 	 * Order matters: In case of duplicates, the multipack index prefers the