ApplyCommand: add a stream to apply a delta patch

Add a new BinaryDeltaInputStream that applies a delta provided by
another InputStream to a given base. Because delta application needs
random access to the base, the base itself cannot be yet another
InputStream. But at least this enables streaming of the result.

Add a simple test using delta hunks generated by C git.

Bug: 371725
Change-Id: Ibd26fa2f49860737ad5c5387f7f4870d3e85e628
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward
new file mode 100644
index 0000000..878b167
--- /dev/null
+++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.forward
@@ -0,0 +1 @@
+ScmZp0Xmwa1z*+$U3j_csN(Dmz
diff --git a/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse
new file mode 100644
index 0000000..7ff7a08
--- /dev/null
+++ b/org.eclipse.jgit.test/tst-rsrc/org/eclipse/jgit/util/io/delta1.reverse
@@ -0,0 +1 @@
+TcmZp5XmD5{u!xa=5hEi28?FP4
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java
new file mode 100644
index 0000000..d9297fc
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/BinaryDeltaInputStreamTest.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.zip.InflaterInputStream;
+
+import org.junit.Test;
+
+/**
+ * Crude tests for the {@link BinaryDeltaInputStream} using delta diffs
+ * generated by C git.
+ */
+public class BinaryDeltaInputStreamTest {
+
+	private InputStream getBinaryHunk(String name) {
+		return this.getClass().getResourceAsStream(name);
+	}
+
+	@Test
+	public void testBinaryDelta() throws Exception {
+		// Prepare our test data
+		byte[] data = new byte[8192];
+		for (int i = 0; i < data.length; i++) {
+			data[i] = (byte) (255 - (i % 256));
+		}
+		// Same, but with five 'x' inserted in the middle.
+		int middle = data.length / 2;
+		byte[] newData = new byte[data.length + 5];
+		System.arraycopy(data, 0, newData, 0, middle);
+		for (int i = 0; i < 5; i++) {
+			newData[middle + i] = 'x';
+		}
+		System.arraycopy(data, middle, newData, middle + 5, middle);
+		// delta1.forward has the instructions
+		// @formatter:off
+		// COPY 0 4096
+		// INSERT 5 xxxxx
+		// COPY 0 4096
+		// @formatter:on
+		// Note that the way we built newData could be expressed as
+		// @formatter:off
+		// COPY 0 4096
+		// INSERT 5 xxxxx
+		// COPY 4096 4096
+		// @formatter:on
+		try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+				BinaryDeltaInputStream input = new BinaryDeltaInputStream(data,
+						new InflaterInputStream(new BinaryHunkInputStream(
+								getBinaryHunk("delta1.forward"))))) {
+			byte[] buf = new byte[1024];
+			int n;
+			while ((n = input.read(buf)) >= 0) {
+				out.write(buf, 0, n);
+			}
+			assertArrayEquals(newData, out.toByteArray());
+			assertTrue(input.isFullyConsumed());
+		}
+		// delta1.reverse has the instructions
+		// @formatter:off
+		// COPY 0 4096
+		// COPY 256 3840
+		// COPY 256 256
+		// @formatter:on
+		// Note that there are alternatives, for instance
+		// @formatter:off
+		// COPY 0 4096
+		// COPY 4101 4096
+		// @formatter:on
+		// or
+		// @formatter:off
+		// COPY 0 4096
+		// COPY 0 4096
+		// @formatter:on
+		try (ByteArrayOutputStream out = new ByteArrayOutputStream();
+				BinaryDeltaInputStream input = new BinaryDeltaInputStream(
+						newData,
+						new InflaterInputStream(new BinaryHunkInputStream(
+								getBinaryHunk("delta1.reverse"))))) {
+			long expectedSize = input.getExpectedResultSize();
+			assertEquals(data.length, expectedSize);
+			byte[] buf = new byte[1024];
+			int n;
+			while ((n = input.read(buf)) >= 0) {
+				out.write(buf, 0, n);
+			}
+			assertArrayEquals(data, out.toByteArray());
+			assertTrue(input.isFullyConsumed());
+		}
+	}
+}
diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
index f8c9ea0..8b6872d 100644
--- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
+++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
@@ -43,6 +43,9 @@
 base85tooLong=Extra base-85 encoded data for output size of {0} bytes
 base85tooShort=Base-85 data decoded into less than {0} bytes
 baseLengthIncorrect=base length incorrect
+binaryDeltaBaseLengthMismatch=Binary delta base length does not match, expected {0}, got {1}
+binaryDeltaInvalidOffset=Binary delta offset + length too large: {0} + {1}
+binaryDeltaInvalidResultLength=Binary delta expected result length is negative
 binaryHunkDecodeError=Binary hunk, line {0}: invalid input
 binaryHunkInvalidLength=Binary hunk, line {0}: input corrupt; expected length byte, got 0x{1}
 binaryHunkLineTooShort=Binary hunk, line {0}: input ended prematurely
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
index 85b40cb..3b67b0f 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
@@ -71,6 +71,9 @@
 	/***/ public String base85tooLong;
 	/***/ public String base85tooShort;
 	/***/ public String baseLengthIncorrect;
+	/***/ public String binaryDeltaBaseLengthMismatch;
+	/***/ public String binaryDeltaInvalidOffset;
+	/***/ public String binaryDeltaInvalidResultLength;
 	/***/ public String binaryHunkDecodeError;
 	/***/ public String binaryHunkInvalidLength;
 	/***/ public String binaryHunkLineTooShort;
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java
new file mode 100644
index 0000000..7f0d87f
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/BinaryDeltaInputStream.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.util.io;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StreamCorruptedException;
+import java.text.MessageFormat;
+
+import org.eclipse.jgit.internal.JGitText;
+
+/**
+ * An {@link InputStream} that applies a binary delta to a base on the fly.
+ * <p>
+ * Delta application to a base needs random access to the base data. The delta
+ * is expressed as a sequence of copy and insert instructions. A copy
+ * instruction has the form "COPY fromOffset length" and says "copy length bytes
+ * from the base, starting at offset fromOffset, to the result". An insert
+ * instruction has the form "INSERT length" followed by length bytes and says
+ * "copy the next length bytes from the delta to the result".
+ * </p>
+ * <p>
+ * These instructions are generated using a content-defined chunking algorithm
+ * (currently C git uses the standard Rabin variant; but there are others that
+ * could be used) that identifies equal chunks. It is entirely possible that a
+ * later copy instruction has a fromOffset that is before the fromOffset of an
+ * earlier copy instruction.
+ * </p>
+ * <p>
+ * This makes it impossible to stream the base.
+ * </p>
+ * <p>
+ * JGit is limited to 2GB maximum size for the base since array indices are
+ * signed 32bit values.
+ *
+ * @since 5.12
+ */
+public class BinaryDeltaInputStream extends InputStream {
+
+	private final byte[] base;
+
+	private final InputStream delta;
+
+	private long resultLength;
+
+	private long toDeliver = -1;
+
+	private int fromBase;
+
+	private int fromDelta;
+
+	private int baseOffset = -1;
+
+	/**
+	 * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
+	 * to {@code base}.
+	 *
+	 * @param base
+	 *            data to apply the delta to
+	 * @param delta
+	 *            {@link InputStream} delivering the delta to apply
+	 */
+	public BinaryDeltaInputStream(byte[] base, InputStream delta) {
+		this.base = base;
+		this.delta = delta;
+	}
+
+	@Override
+	public int read() throws IOException {
+		int b = readNext();
+		if (b >= 0) {
+			toDeliver--;
+		}
+		return b;
+	}
+
+	private void initialize() throws IOException {
+		long baseSize = readVarInt(delta);
+		if (baseSize > Integer.MAX_VALUE || baseSize < 0
+				|| (int) baseSize != base.length) {
+			throw new IOException(MessageFormat.format(
+					JGitText.get().binaryDeltaBaseLengthMismatch,
+					Integer.valueOf(base.length), Long.valueOf(baseSize)));
+		}
+		resultLength = readVarInt(delta);
+		if (resultLength < 0) {
+			throw new StreamCorruptedException(
+					JGitText.get().binaryDeltaInvalidResultLength);
+		}
+		toDeliver = resultLength;
+		baseOffset = 0;
+	}
+
+	private int readNext() throws IOException {
+		if (baseOffset < 0) {
+			initialize();
+		}
+		if (fromBase > 0) {
+			fromBase--;
+			return base[baseOffset++] & 0xFF;
+		} else if (fromDelta > 0) {
+			fromDelta--;
+			return delta.read();
+		}
+		int command = delta.read();
+		if (command < 0) {
+			return -1;
+		}
+		if ((command & 0x80) != 0) {
+			// Decode offset and length to read from base
+			long copyOffset = 0;
+			for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
+				if ((command & i) != 0) {
+					copyOffset |= ((long) next(delta)) << shift;
+				}
+			}
+			int copySize = 0;
+			for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
+				if ((command & i) != 0) {
+					copySize |= next(delta) << shift;
+				}
+			}
+			if (copySize == 0) {
+				copySize = 0x10000;
+			}
+			if (copyOffset > base.length - copySize) {
+				throw new StreamCorruptedException(MessageFormat.format(
+						JGitText.get().binaryDeltaInvalidOffset,
+						Long.valueOf(copyOffset), Integer.valueOf(copySize)));
+			}
+			baseOffset = (int) copyOffset;
+			fromBase = copySize;
+			return readNext();
+		} else if (command != 0) {
+			// The next 'command' bytes come from the delta
+			fromDelta = command - 1;
+			return delta.read();
+		} else {
+			// Zero is reserved
+			throw new StreamCorruptedException(
+					JGitText.get().unsupportedCommand0);
+		}
+	}
+
+	private int next(InputStream in) throws IOException {
+		int b = in.read();
+		if (b < 0) {
+			throw new EOFException();
+		}
+		return b;
+	}
+
+	private long readVarInt(InputStream in) throws IOException {
+		long val = 0;
+		int shift = 0;
+		int b;
+		do {
+			b = next(in);
+			val |= ((long) (b & 0x7f)) << shift;
+			shift += 7;
+		} while ((b & 0x80) != 0);
+		return val;
+	}
+
+	/**
+	 * Tells the expected size of the final result.
+	 *
+	 * @return the size
+	 * @throws IOException
+	 *             if the size cannot be determined from {@code delta}
+	 */
+	public long getExpectedResultSize() throws IOException {
+		if (baseOffset < 0) {
+			initialize();
+		}
+		return resultLength;
+	}
+
+	/**
+	 * Tells whether the delta has been fully consumed, and the expected number
+	 * of bytes for the combined result have been read from this
+	 * {@link BinaryDeltaInputStream}.
+	 *
+	 * @return whether delta application was successful
+	 */
+	public boolean isFullyConsumed() {
+		try {
+			return toDeliver == 0 && delta.read() < 0;
+		} catch (IOException e) {
+			return toDeliver == 0;
+		}
+	}
+
+	@Override
+	public void close() throws IOException {
+		delta.close();
+	}
+}