Skip detecting content renames for binary files

This is similar to change Idbc2c29bd that skipped detecting content
renames for large files. With this change, we added a new option in
RenameDetector called "skipContentRenamesForBinaryFiles", that when set,
causes binary files with any slight modification to be identified as
added/deleted. The default for this boolean is false, so preserving
current behaviour.

Change-Id: I4770b1f69c60b1037025ddd0940ba86df6047299
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
index 2ea3cd7..5edb60c 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
@@ -580,6 +580,57 @@
 	}
 
 	@Test
+	public void testExactRenameForBinaryFile_isIdentified() throws Exception {
+		ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, aId);
+
+		rd.add(a);
+		rd.add(b);
+
+		List<DiffEntry> entries = rd.compute();
+		assertEquals(1, entries.size());
+		assertRename(b, a, 100, entries.get(0));
+	}
+
+	@Test
+	public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception {
+		ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+		ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
+
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		rd.add(a);
+		rd.add(b);
+		rd.setRenameScore(40);
+
+		List<DiffEntry> entries = rd.compute();
+		assertEquals(1, entries.size());
+		assertRename(b, a, 50, entries.get(0));
+	}
+
+	@Test
+	public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception {
+		ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+		ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
+
+		DiffEntry a = DiffEntry.add(PATH_A, aId);
+		DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+		rd.add(a);
+		rd.add(b);
+		rd.setRenameScore(40);
+		rd.setSkipContentRenamesForBinaryFiles(true);
+
+		List<DiffEntry> entries = rd.compute();
+		assertEquals(2, entries.size());
+		assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
+		assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
+	}
+
+	@Test
 	public void testSetRenameScore_IllegalArgs() throws Exception {
 		try {
 			rd.setRenameScore(-1);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
index 75784c2..ba1f63b 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
@@ -104,6 +104,13 @@
 	 */
 	private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
 
+	/**
+	 * Skip detecting content renames for binary files. Content renames are
+	 * those that are not exact, that is with a slight content modification
+	 * between the two files.
+	 */
+	private boolean skipContentRenamesForBinaryFiles = false;
+
 	/** Set if the number of adds or deletes was over the limit. */
 	private boolean overRenameLimit;
 
@@ -236,6 +243,26 @@
 	}
 
 	/**
+	 * Get skipping detecting content renames for binary files.
+	 *
+	 * @return true if content renames should be skipped for binary files, false otherwise.
+	 * @since 5.12
+	 */
+	public boolean getSkipContentRenamesForBinaryFiles() {
+		return skipContentRenamesForBinaryFiles;
+	}
+
+	/**
+	 * Sets skipping detecting content renames for binary files.
+	 *
+	 * @param value true if content renames should be skipped for binary files, false otherwise.
+	 * @since 5.12
+	 */
+	public void setSkipContentRenamesForBinaryFiles(boolean value) {
+		this.skipContentRenamesForBinaryFiles = value;
+	}
+
+	/**
 	 * Check if the detector is over the rename limit.
 	 * <p>
 	 * This method can be invoked either before or after {@code getEntries} has
@@ -521,6 +548,7 @@
 			d = new SimilarityRenameDetector(reader, deleted, added);
 			d.setRenameScore(getRenameScore());
 			d.setBigFileThreshold(getBigFileThreshold());
+			d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
 			d.compute(pm);
 			overRenameLimit |= d.isTableOverflow();
 			deleted = d.getLeftOverSources();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index fb6e5df..661369b 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -102,6 +102,15 @@
 		idGrowAt = growAt(idHashBits);
 	}
 
+	static boolean isBinary(ObjectLoader obj) throws IOException {
+		if (obj.isLarge()) {
+			try (ObjectStream in1 = obj.openStream()) {
+				return RawText.isBinary(in1);
+			}
+		}
+		return RawText.isBinary(obj.getCachedBytes());
+	}
+
 	void hash(ObjectLoader obj) throws MissingObjectException, IOException,
 			TableFullException {
 		if (obj.isLarge()) {
@@ -115,9 +124,7 @@
 	private void hashLargeObject(ObjectLoader obj) throws IOException,
 			TableFullException {
 		boolean text;
-		try (ObjectStream in1 = obj.openStream()) {
-			text = !RawText.isBinary(in1);
-		}
+		text = !isBinary(obj);
 
 		try (ObjectStream in2 = obj.openStream()) {
 			hash(in2, in2.getSize(), text);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index 082f31d..5871b4a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -26,6 +26,7 @@
 import org.eclipse.jgit.internal.JGitText;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ProgressMonitor;
 
 class SimilarityRenameDetector {
@@ -87,6 +88,9 @@
 	 */
 	private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
 
+	/** Skip content renames for binary files. */
+	private boolean skipBinaryFiles = false;
+
 	/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
 	private boolean tableOverflow;
 
@@ -107,6 +111,10 @@
 		bigFileThreshold = threshold;
 	}
 
+	void setSkipBinaryFiles(boolean value) {
+		skipBinaryFiles = value;
+	}
+
 	void compute(ProgressMonitor pm) throws IOException, CancelledException {
 		if (pm == null)
 			pm = NullProgressMonitor.INSTANCE;
@@ -271,7 +279,12 @@
 
 				if (s == null) {
 					try {
-						s = hash(OLD, srcEnt);
+						ObjectLoader loader = reader.open(OLD, srcEnt);
+						if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
+							pm.update(1);
+							continue SRC;
+						}
+						s = hash(loader);
 					} catch (TableFullException tableFull) {
 						tableOverflow = true;
 						continue SRC;
@@ -280,7 +293,12 @@
 
 				SimilarityIndex d;
 				try {
-					d = hash(NEW, dstEnt);
+					ObjectLoader loader = reader.open(NEW, dstEnt);
+					if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
+						pm.update(1);
+						continue;
+					}
+					d = hash(loader);
 				} catch (TableFullException tableFull) {
 					if (dstTooLarge == null)
 						dstTooLarge = new BitSet(dsts.size());
@@ -364,10 +382,10 @@
 		return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
 	}
 
-	private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
+	private SimilarityIndex hash(ObjectLoader objectLoader)
 			throws IOException, TableFullException {
 		SimilarityIndex r = new SimilarityIndex();
-		r.hash(reader.open(side, ent));
+		r.hash(objectLoader);
 		r.sort();
 		return r;
 	}