Added check for binary files while diffing

Added a check in Diff to ensure that files that are most likely
not text are not line-by-line diffed. Files are determined to be
binary by checking the first 8000 bytes for a null character. This
is a similar heuristic to what C Git uses.

Change-Id: I2b6f05674c88d89b3f549a5db483f850f7f46c26
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
index 931c46d..fc1e400 100644
--- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
@@ -132,16 +132,28 @@ protected void outputDiff(PrintStream out, String path,
 			+ (mode1.equals(mode2) ? " " + mode1 : ""));
 		out.println("--- " + (isNew ?  "/dev/null" : name1));
 		out.println("+++ " + (isDelete ?  "/dev/null" : name2));
-		RawText a = getRawText(id1);
-		RawText b = getRawText(id2);
+
+		byte[] aRaw = getRawBytes(id1);
+		byte[] bRaw = getRawBytes(id2);
+
+		if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
+			out.println("Binary files differ");
+			return;
+		}
+
+		RawText a = getRawText(aRaw);
+		RawText b = getRawText(bRaw);
 		MyersDiff diff = new MyersDiff(a, b);
 		fmt.formatEdits(out, a, b, diff.getEdits());
 	}
 
-	private RawText getRawText(ObjectId id) throws IOException {
+	private byte[] getRawBytes(ObjectId id) throws IOException {
 		if (id.equals(ObjectId.zeroId()))
-			return new RawText(new byte[] {});
-		byte[] raw = db.openBlob(id).getCachedBytes();
+			return new byte[] {};
+		return db.openBlob(id).getCachedBytes();
+	}
+
+	private RawText getRawText(byte[] raw) {
 		if (ignoreWsAll)
 			return new RawTextIgnoreAllWhitespace(raw);
 		else if (ignoreWsTrailing)
@@ -154,4 +166,3 @@ else if (ignoreWsLeading)
 			return new RawText(raw);
 	}
 }
-
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index c785534..c01cb7a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -65,6 +65,9 @@
  * they are converting from "line number" to "element index".
  */
 public class RawText implements Sequence {
+	/** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */
+	private static final int FIRST_FEW_BYTES = 8000;
+
 	/** The file content for this sequence. */
 	protected final byte[] content;
 
@@ -202,4 +205,22 @@ protected int hashLine(final byte[] raw, int ptr, final int end) {
 			hash = (hash << 5) ^ (raw[ptr] & 0xff);
 		return hash;
 	}
+
+	/**
+	 * Determine heuristically whether a byte array represents binary (as
+	 * opposed to text) content.
+	 *
+	 * @param raw
+	 *            the raw file content.
+	 * @return true if raw is likely to be a binary file, false otherwise
+	 */
+	public static boolean isBinary(byte[] raw) {
+		// Same heuristic as C Git
+		int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length;
+		for (int ptr = 0; ptr < size; ptr++)
+			if (raw[ptr] == '\0')
+				return true;
+
+		return false;
+	}
 }