ApplyCommand: add streams to read/write binary patch hunks

Add streams that can encode or decode git binary patch data on the fly.
Git writes binary patches base-85 encoded, at most 52 un-encoded bytes,
with the unencoded data length prefixed in a one-character encoding, and
suffixed with a newline character.

Add a test for both the new input and the output stream. The test
roundtrips binary data of different lengths in different ways.

Bug: 371725
Change-Id: Ic3faebaa4637520f5448b3d1acd78d5aaab3907a
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryHunkStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryHunkStreamTest.java
new file mode 100644
index 0000000..b198c32
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryHunkStreamTest.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.junit.Test;
+
+/**
+ * Tests for {@link BinaryHunkInputStream} and {@link BinaryHunkOutputStream}.
+ */
+public class BinaryHunkStreamTest {
+
+	@Test
+	public void testRoundtripWholeBuffer() throws IOException {
+		for (int length = 1; length < 520 + 52; length++) {
+			byte[] data = new byte[length];
+			for (int i = 0; i < data.length; i++) {
+				data[i] = (byte) (255 - (i % 256));
+			}
+			try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
+					BinaryHunkOutputStream out = new BinaryHunkOutputStream(
+							bos)) {
+				out.write(data);
+				out.flush();
+				byte[] encoded = bos.toByteArray();
+				assertFalse(Arrays.equals(data, encoded));
+				try (BinaryHunkInputStream in = new BinaryHunkInputStream(
+						new ByteArrayInputStream(encoded))) {
+					byte[] decoded = new byte[data.length];
+					int newLength = in.read(decoded);
+					assertEquals(newLength, decoded.length);
+					assertEquals(-1, in.read());
+					assertArrayEquals(data, decoded);
+				}
+			}
+		}
+	}
+
+	@Test
+	public void testRoundtripChunks() throws IOException {
+		for (int length = 1; length < 520 + 52; length++) {
+			byte[] data = new byte[length];
+			for (int i = 0; i < data.length; i++) {
+				data[i] = (byte) (255 - (i % 256));
+			}
+			try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
+					BinaryHunkOutputStream out = new BinaryHunkOutputStream(
+							bos)) {
+				out.write(data, 0, data.length / 2);
+				out.write(data, data.length / 2, data.length - data.length / 2);
+				out.flush();
+				byte[] encoded = bos.toByteArray();
+				assertFalse(Arrays.equals(data, encoded));
+				try (BinaryHunkInputStream in = new BinaryHunkInputStream(
+						new ByteArrayInputStream(encoded))) {
+					byte[] decoded = new byte[data.length];
+					int p = 0;
+					int n;
+					while ((n = in.read(decoded, p,
+							Math.min(decoded.length - p, 57))) >= 0) {
+						p += n;
+						if (p == decoded.length) {
+							break;
+						}
+					}
+					assertEquals(p, decoded.length);
+					assertEquals(-1, in.read());
+					assertArrayEquals(data, decoded);
+				}
+			}
+		}
+	}
+
+	@Test
+	public void testRoundtripBytes() throws IOException {
+		for (int length = 1; length < 520 + 52; length++) {
+			byte[] data = new byte[length];
+			for (int i = 0; i < data.length; i++) {
+				data[i] = (byte) (255 - (i % 256));
+			}
+			try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
+					BinaryHunkOutputStream out = new BinaryHunkOutputStream(
+							bos)) {
+				for (int i = 0; i < data.length; i++) {
+					out.write(data[i]);
+				}
+				out.flush();
+				byte[] encoded = bos.toByteArray();
+				assertFalse(Arrays.equals(data, encoded));
+				try (BinaryHunkInputStream in = new BinaryHunkInputStream(
+						new ByteArrayInputStream(encoded))) {
+					byte[] decoded = new byte[data.length];
+					for (int i = 0; i < decoded.length; i++) {
+						int val = in.read();
+						assertTrue(0 <= val && val <= 255);
+						decoded[i] = (byte) val;
+					}
+					assertEquals(-1, in.read());
+					assertArrayEquals(data, decoded);
+				}
+			}
+		}
+	}
+
+	@Test
+	public void testRoundtripWithClose() throws IOException {
+		for (int length = 1; length < 520 + 52; length++) {
+			byte[] data = new byte[length];
+			for (int i = 0; i < data.length; i++) {
+				data[i] = (byte) (255 - (i % 256));
+			}
+			try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
+				try (BinaryHunkOutputStream out = new BinaryHunkOutputStream(
+						bos)) {
+					out.write(data);
+				}
+				byte[] encoded = bos.toByteArray();
+				assertFalse(Arrays.equals(data, encoded));
+				try (BinaryHunkInputStream in = new BinaryHunkInputStream(
+						new ByteArrayInputStream(encoded))) {
+					byte[] decoded = new byte[data.length];
+					int newLength = in.read(decoded);
+					assertEquals(newLength, decoded.length);
+					assertEquals(-1, in.read());
+					assertArrayEquals(data, decoded);
+				}
+			}
+		}
+	}
+}
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index 6c4ca52..f8c9ea0 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -43,6 +43,10 @@
 base85tooLong=Extra base-85 encoded data for output size of {0} bytes
 base85tooShort=Base-85 data decoded into less than {0} bytes
 baseLengthIncorrect=base length incorrect
+binaryHunkDecodeError=Binary hunk, line {0}: invalid input
+binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1}
+binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely
+binaryHunkMissingNewline=Binary hunk, line {0}: input line not terminated by newline
 bitmapMissingObject=Bitmap at {0} is missing {1}.
 bitmapsMustBePrepared=Bitmaps must be prepared before they may be written.
 blameNotCommittedYet=Not Committed Yet
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index 5c194f3..85b40cb 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -71,6 +71,10 @@
 	/***/ public String base85tooLong;
 	/***/ public String base85tooShort;
 	/***/ public String baseLengthIncorrect;
+	/***/ public String binaryHunkDecodeError;
+	/***/ public String binaryHunkInvalidLength;
+	/***/ public String binaryHunkLineTooShort;
+	/***/ public String binaryHunkMissingNewline;
 	/***/ public String bitmapMissingObject;
 	/***/ public String bitmapsMustBePrepared;
 	/***/ public String blameNotCommittedYet;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkInputStream.java
new file mode 100644
index 0000000..57b2d7a
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkInputStream.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StreamCorruptedException;
+import java.text.MessageFormat;
+
+import org.eclipse.jgit.internal.JGitText;
+import org.eclipse.jgit.util.Base85;
+
+/**
+ * A stream that decodes git binary patch data on the fly.
+ *
+ * @since 5.12
+ */
+public class BinaryHunkInputStream extends InputStream {
+
+	private final InputStream in;
+
+	private int lineNumber;
+
+	private byte[] buffer;
+
+	private int pos = 0;
+
+	/**
+	 * Creates a new {@link BinaryHunkInputStream}.
+	 *
+	 * @param in
+	 *            {@link InputStream} to read the base-85 encoded patch data
+	 *            from
+	 */
+	public BinaryHunkInputStream(InputStream in) {
+		this.in = in;
+	}
+
+	@Override
+	public int read() throws IOException {
+		if (pos < 0) {
+			return -1;
+		}
+		if (buffer == null || pos == buffer.length) {
+			fillBuffer();
+		}
+		if (pos >= 0) {
+			return buffer[pos++] & 0xFF;
+		}
+		return -1;
+	}
+
+	@Override
+	public void close() throws IOException {
+		in.close();
+		buffer = null;
+	}
+
+	private void fillBuffer() throws IOException {
+		int length = in.read();
+		if (length < 0) {
+			pos = length;
+			buffer = null;
+			return;
+		}
+		lineNumber++;
+		// Length is encoded with characters, A..Z for 1..26 and a..z for 27..52
+		if ('A' <= length && length <= 'Z') {
+			length = length - 'A' + 1;
+		} else if ('a' <= length && length <= 'z') {
+			length = length - 'a' + 27;
+		} else {
+			throw new StreamCorruptedException(MessageFormat.format(
+					JGitText.get().binaryHunkInvalidLength,
+					Integer.valueOf(lineNumber), Integer.toHexString(length)));
+		}
+		byte[] encoded = new byte[Base85.encodedLength(length)];
+		for (int i = 0; i < encoded.length; i++) {
+			int b = in.read();
+			if (b < 0 || b == '\n') {
+				throw new EOFException(MessageFormat.format(
+						JGitText.get().binaryHunkInvalidLength,
+						Integer.valueOf(lineNumber)));
+			}
+			encoded[i] = (byte) b;
+		}
+		// Must be followed by a newline; tolerate EOF.
+		int b = in.read();
+		if (b >= 0 && b != '\n') {
+			throw new StreamCorruptedException(MessageFormat.format(
+					JGitText.get().binaryHunkMissingNewline,
+					Integer.valueOf(lineNumber)));
+		}
+		try {
+			buffer = Base85.decode(encoded, length);
+		} catch (IllegalArgumentException e) {
+			StreamCorruptedException ex = new StreamCorruptedException(
+					MessageFormat.format(JGitText.get().binaryHunkDecodeError,
+							Integer.valueOf(lineNumber)));
+			ex.initCause(e);
+			throw ex;
+		}
+		pos = 0;
+	}
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkOutputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkOutputStream.java
new file mode 100644
index 0000000..30551c0
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryHunkOutputStream.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.eclipse.jgit.util.Base85;
+
+/**
+ * An {@link OutputStream} that encodes data for a git binary patch.
+ *
+ * @since 5.12
+ */
+public class BinaryHunkOutputStream extends OutputStream {
+
+	private static final int MAX_BYTES = 52;
+
+	private final OutputStream out;
+
+	private final byte[] buffer = new byte[MAX_BYTES];
+
+	private int pos;
+
+	/**
+	 * Creates a new {@link BinaryHunkOutputStream}.
+	 *
+	 * @param out
+	 *            {@link OutputStream} to write the encoded data to
+	 */
+	public BinaryHunkOutputStream(OutputStream out) {
+		this.out = out;
+	}
+
+	/**
+	 * Flushes and closes this stream, and closes the underlying
+	 * {@link OutputStream}.
+	 */
+	@Override
+	public void close() throws IOException {
+		flush();
+		out.close();
+	}
+
+	/**
+	 * Writes any buffered output as a binary patch line to the underlying
+	 * {@link OutputStream} and flushes that stream, too.
+	 */
+	@Override
+	public void flush() throws IOException {
+		if (pos > 0) {
+			encode(buffer, 0, pos);
+			pos = 0;
+		}
+		out.flush();
+	}
+
+	@Override
+	public void write(int b) throws IOException {
+		buffer[pos++] = (byte) b;
+		if (pos == buffer.length) {
+			encode(buffer, 0, pos);
+			pos = 0;
+		}
+	}
+
+	@Override
+	public void write(byte[] b, int off, int len) throws IOException {
+		if (len == 0) {
+			return;
+		}
+		int toCopy = len;
+		int in = off;
+		if (pos > 0) {
+			// Fill the buffer
+			int chunk = Math.min(toCopy, buffer.length - pos);
+			System.arraycopy(b, in, buffer, pos, chunk);
+			in += chunk;
+			pos += chunk;
+			toCopy -= chunk;
+			if (pos == buffer.length) {
+				encode(buffer, 0, pos);
+				pos = 0;
+			}
+			if (toCopy == 0) {
+				return;
+			}
+		}
+		while (toCopy >= MAX_BYTES) {
+			encode(b, in, MAX_BYTES);
+			toCopy -= MAX_BYTES;
+			in += MAX_BYTES;
+		}
+		if (toCopy > 0) {
+			System.arraycopy(b, in, buffer, 0, toCopy);
+			pos = toCopy;
+		}
+	}
+
+	private void encode(byte[] data, int off, int length) throws IOException {
+		if (length <= 26) {
+			out.write('A' + length - 1);
+		} else {
+			out.write('a' + length - 27);
+		}
+		out.write(Base85.encode(data, off, length));
+		out.write('\n');
+	}
+}