PackBitmapIndex: Reduce memory usage in GC

Currently, the garbage collection is consistently failing for some large
repositories in the building bitmap phase, e.g.Linux-MSM project:
https://source.codeaurora.org/quic/la/kernel/msm-3.18

Historically, bitmap index creation happened in 3 phases:
1. Select the commits to which bitmaps should be attached.
2. Create all bitmaps for these commits, stored in uncompressed format
in the PackBitmapIndexBuilder.
3. Deltify the bitmaps and write them to disk.

We investigated the process. For phase 2 it's most efficient to create
bitmaps starting with oldest commit and moving to the newest commit,
because the newer commits are able to reuse the work for the old ones.
But for bitmap deltification in phase 3, it's better when a newer
commit's bitmap is the base, and the current disk format writes bitmaps
out for the newest commits first.

This change introduces a new collection to hold the deltified and
compressed representations of the bitmaps, keeping a smaller subset of
commits in the PackBitmapIndexBuilder to help make the bitmap index
creation more memory efficient.

And in this commit, we're setting DISTANCE_THRESHOLD to 0 in the
PackWriterBitmapPreparer, which means the garbage collection will not
have much behavoir change and will still use as much memory as before.

Change-Id: I6ec2c3e8dde11805af47874d67d33cf1ef83660e
Signed-off-by: Yunjie Li <yunjieli@google.com>
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java
index d87b699..5666b57 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java
@@ -11,12 +11,13 @@
 package org.eclipse.jgit.internal.storage.file;
 
 import java.text.MessageFormat;
+import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.List;
-import java.util.NoSuchElementException;
 
 import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.internal.storage.pack.BitmapCommit;
 import org.eclipse.jgit.internal.storage.pack.ObjectToPack;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.BitmapIndex.Bitmap;
@@ -39,8 +40,12 @@
 	private final EWAHCompressedBitmap blobs;
 	private final EWAHCompressedBitmap tags;
 	private final BlockList<PositionEntry> byOffset;
-	final BlockList<StoredBitmap>
-			byAddOrder = new BlockList<>();
+
+	private final LinkedList<StoredBitmap>
+			bitmapsToWriteXorBuffer = new LinkedList<>();
+
+	private List<StoredEntry> bitmapsToWrite = new ArrayList<>();
+
 	final ObjectIdOwnerMap<PositionEntry>
 			positionEntries = new ObjectIdOwnerMap<>();
 
@@ -136,6 +141,63 @@
 	}
 
 	/**
+	 * Processes a commit and prepares its bitmap to write to the bitmap index
+	 * file.
+	 *
+	 * @param c
+	 *            the commit corresponds to the bitmap.
+	 * @param bitmap
+	 *            the bitmap to be written.
+	 * @param flags
+	 *            the flags of the commit.
+	 */
+	public void processBitmapForWrite(BitmapCommit c, Bitmap bitmap,
+			int flags) {
+		EWAHCompressedBitmap compressed = bitmap.retrieveCompressed();
+		compressed.trim();
+		StoredBitmap newest = new StoredBitmap(c, compressed, null, flags);
+
+		bitmapsToWriteXorBuffer.add(newest);
+		if (bitmapsToWriteXorBuffer.size() > MAX_XOR_OFFSET_SEARCH) {
+			bitmapsToWrite.add(
+					generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst()));
+		}
+
+		if (c.isAddToIndex()) {
+			// The Bitmap map in the base class is used to make revwalks
+			// efficient, so only add bitmaps that keep it efficient without
+			// bloating memory.
+			addBitmap(c, bitmap, flags);
+		}
+	}
+
+	private StoredEntry generateStoredEntry(StoredBitmap bitmapToWrite) {
+		int bestXorOffset = 0;
+		EWAHCompressedBitmap bestBitmap = bitmapToWrite.getBitmap();
+
+		int offset = 1;
+		for (StoredBitmap curr : bitmapsToWriteXorBuffer) {
+			EWAHCompressedBitmap bitmap = curr.getBitmap()
+					.xor(bitmapToWrite.getBitmap());
+			if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) {
+				bestBitmap = bitmap;
+				bestXorOffset = offset;
+			}
+			offset++;
+		}
+
+		PositionEntry entry = positionEntries.get(bitmapToWrite);
+		if (entry == null) {
+			throw new IllegalStateException();
+		}
+		bestBitmap.trim();
+		StoredEntry result = new StoredEntry(entry.namePosition, bestBitmap,
+				bestXorOffset, bitmapToWrite.getFlags());
+
+		return result;
+	}
+
+	/**
 	 * Stores the bitmap for the objectId.
 	 *
 	 * @param objectId
@@ -150,7 +212,6 @@
 		bitmap.trim();
 		StoredBitmap result = new StoredBitmap(objectId, bitmap, null, flags);
 		getBitmaps().add(result);
-		byAddOrder.add(result);
 	}
 
 	/** {@inheritDoc} */
@@ -236,15 +297,18 @@
 	/** {@inheritDoc} */
 	@Override
 	public int getBitmapCount() {
-		return getBitmaps().size();
+		return bitmapsToWriteXorBuffer.size() + bitmapsToWrite.size();
 	}
 
 	/**
 	 * Remove all the bitmaps entries added.
+	 *
+	 * @param size
+	 *            the expected number of bitmap entries to be written.
 	 */
-	public void clearBitmaps() {
-		byAddOrder.clear();
+	public void resetBitmaps(int size) {
 		getBitmaps().clear();
+		bitmapsToWrite = new ArrayList<>(size);
 	}
 
 	/** {@inheritDoc} */
@@ -254,64 +318,18 @@
 	}
 
 	/**
-	 * Get an iterator over the xor compressed entries.
+	 * Get list of xor compressed entries that need to be written.
 	 *
-	 * @return an iterator over the xor compressed entries.
+	 * @return a list of the xor compressed entries.
 	 */
-	public Iterable<StoredEntry> getCompressedBitmaps() {
-		// Add order is from oldest to newest. The reverse add order is the
-		// output order.
-		return () -> new Iterator<StoredEntry>() {
+	public List<StoredEntry> getCompressedBitmaps() {
+		while (!bitmapsToWriteXorBuffer.isEmpty()) {
+			bitmapsToWrite.add(
+					generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst()));
+		}
 
-			private int index = byAddOrder.size() - 1;
-
-			@Override
-			public boolean hasNext() {
-				return index >= 0;
-			}
-
-			@Override
-			public StoredEntry next() {
-				if (!hasNext()) {
-					throw new NoSuchElementException();
-				}
-				StoredBitmap item = byAddOrder.get(index);
-				int bestXorOffset = 0;
-				EWAHCompressedBitmap bestBitmap = item.getBitmap();
-
-				// Attempt to compress the bitmap with an XOR of the
-				// previously written entries.
-				for (int i = 1; i <= MAX_XOR_OFFSET_SEARCH; i++) {
-					int curr = i + index;
-					if (curr >= byAddOrder.size()) {
-						break;
-					}
-
-					StoredBitmap other = byAddOrder.get(curr);
-					EWAHCompressedBitmap bitmap = other.getBitmap()
-							.xor(item.getBitmap());
-
-					if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) {
-						bestBitmap = bitmap;
-						bestXorOffset = i;
-					}
-				}
-				index--;
-
-				PositionEntry entry = positionEntries.get(item);
-				if (entry == null) {
-					throw new IllegalStateException();
-				}
-				bestBitmap.trim();
-				return new StoredEntry(entry.namePosition, bestBitmap,
-						bestXorOffset, item.getFlags());
-			}
-
-			@Override
-			public void remove() {
-				throw new UnsupportedOperationException();
-			}
-		};
+		Collections.reverse(bitmapsToWrite);
+		return bitmapsToWrite;
 	}
 
 	/** Data object for the on disk representation of a bitmap entry. */
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
index 0c965fe..824c62a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
@@ -2331,8 +2331,14 @@
 				throw new IllegalStateException(MessageFormat.format(
 						JGitText.get().bitmapMissingObject, cmit.name(),
 						last.name()));
-			last = cmit;
-			writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags());
+			last = BitmapCommit.copyFrom(cmit).build();
+			writeBitmaps.processBitmapForWrite(cmit, bitmap.build(),
+					cmit.getFlags());
+
+			// The bitmap walker should stop when the walk hits the previous
+			// commit, which saves time.
+			walker.setPrevCommit(last);
+			walker.setPrevBitmap(bitmap);
 
 			pm.update(1);
 		}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java
index 0377bcc..fefe565 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java
@@ -58,6 +58,8 @@
 
 	private static final int DAY_IN_SECONDS = 24 * 60 * 60;
 
+	private static final int DISTANCE_THRESHOLD = 0;
+
 	private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
 			RevCommit a, RevCommit b) -> Integer
 					.signum(b.getCommitTime() - a.getCommitTime());
@@ -244,6 +246,7 @@
 					// This commit is selected.
 					// Calculate where to look for the next one.
 					int flags = nextFlg;
+					int currDist = distanceFromTip;
 					nextIn = nextSpan(distanceFromTip);
 					nextFlg = nextIn == distantCommitSpan
 							? PackBitmapIndex.FLAG_REUSE
@@ -279,8 +282,17 @@
 						longestAncestorChain = new ArrayList<>();
 						chains.add(longestAncestorChain);
 					}
-					longestAncestorChain.add(new BitmapCommit(c,
-							!longestAncestorChain.isEmpty(), flags));
+
+					// The commit bc should reuse bitmap walker from its close
+					// ancestor. And the bitmap of bc should only be added to
+					// PackBitmapIndexBuilder when it's an old enough
+					// commit,i.e. distance from tip should be greater than
+					// DISTANCE_THRESHOLD, to save memory.
+					BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags)
+							.setAddToIndex(currDist >= DISTANCE_THRESHOLD)
+							.setReuseWalker(!longestAncestorChain.isEmpty())
+							.build();
+					longestAncestorChain.add(bc);
 					writeBitmaps.addBitmap(c, bitmap, 0);
 				}
 
@@ -288,12 +300,12 @@
 					selections.addAll(chain);
 				}
 			}
-			writeBitmaps.clearBitmaps(); // Remove the temporary commit bitmaps.
 
 			// Add the remaining peeledWant
 			for (AnyObjectId remainingWant : selectionHelper.newWants) {
 				selections.add(new BitmapCommit(remainingWant, false, 0));
 			}
+			writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps.
 
 			pm.endTask();
 			return selections;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java
index 023962e..aaecd23 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java
@@ -16,6 +16,7 @@
 import org.eclipse.jgit.errors.IncorrectObjectTypeException;
 import org.eclipse.jgit.errors.MissingObjectException;
 import org.eclipse.jgit.internal.revwalk.AddToBitmapFilter;
+import org.eclipse.jgit.internal.revwalk.AddToBitmapWithCacheFilter;
 import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.BitmapIndex;
@@ -41,6 +42,11 @@
 
 	private long countOfBitmapIndexMisses;
 
+	// Cached bitmap and commit to save walk time.
+	private AnyObjectId prevCommit;
+
+	private Bitmap prevBitmap;
+
 	/**
 	 * Create a BitmapWalker.
 	 *
@@ -56,6 +62,28 @@
 	}
 
 	/**
+	 * Set the cached commit for the walker.
+	 *
+	 * @param prevCommit
+	 *            the cached commit.
+	 * @since 5.7
+	 */
+	public void setPrevCommit(AnyObjectId prevCommit) {
+		this.prevCommit = prevCommit;
+	}
+
+	/**
+	 * Set the bitmap associated with the cached commit for the walker.
+	 *
+	 * @param prevBitmap
+	 *            the bitmap associated with the cached commit.
+	 * @since 5.7
+	 */
+	public void setPrevBitmap(Bitmap prevBitmap) {
+		this.prevBitmap = prevBitmap;
+	}
+
+	/**
 	 * Return the number of objects that had to be walked because they were not covered by a
 	 * bitmap.
 	 *
@@ -169,7 +197,10 @@
 		}
 
 		if (marked) {
-			if (seen == null) {
+			if (prevCommit != null) {
+				walker.setRevFilter(new AddToBitmapWithCacheFilter(prevCommit,
+						prevBitmap, bitmapResult));
+			} else if (seen == null) {
 				walker.setRevFilter(new AddToBitmapFilter(bitmapResult));
 			} else {
 				walker.setRevFilter(