Add the "compression-level" option to all ArchiveCommand formats

Different archive formats support a compression level in the range
[0-9]. The value 0 is for lowest compressions and 9 for highest. Highest
levels produce output files of smaller sizes but require more memory to
do the compression.

This change allows passing a "compression-level" option to the git
archive command and implements using it for different file formats.

Change-Id: I5758f691c37ba630dbac24db67bb7da827bbc8e1
Signed-off-by: Youssef Elghareeb <ghareeb@google.com>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
diff --git a/org.eclipse.jgit.archive/.settings/.api_filters b/org.eclipse.jgit.archive/.settings/.api_filters
new file mode 100644
index 0000000..f4a934a
--- /dev/null
+++ b/org.eclipse.jgit.archive/.settings/.api_filters
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<component id="org.eclipse.jgit.archive" version="2">
+    <resource path="src/org/eclipse/jgit/archive/BaseFormat.java" type="org.eclipse.jgit.archive.BaseFormat">
+        <filter id="336658481">
+            <message_arguments>
+                <message_argument value="org.eclipse.jgit.archive.BaseFormat"/>
+                <message_argument value="COMPRESSION_LEVEL"/>
+            </message_arguments>
+        </filter>
+    </resource>
+</component>
diff --git a/org.eclipse.jgit.archive/resources/org/eclipse/jgit/archive/internal/ArchiveText.properties b/org.eclipse.jgit.archive/resources/org/eclipse/jgit/archive/internal/ArchiveText.properties
index 3b50bb4..e6e1227 100644
--- a/org.eclipse.jgit.archive/resources/org/eclipse/jgit/archive/internal/ArchiveText.properties
+++ b/org.eclipse.jgit.archive/resources/org/eclipse/jgit/archive/internal/ArchiveText.properties
@@ -1,3 +1,4 @@
 cannotSetOption=Cannot set option: {0}
+invalidCompressionLevel=Invalid compression level: {0}
 pathDoesNotMatchMode=Path {0} does not match mode {1}
 unsupportedMode=Unsupported mode {0}
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/BaseFormat.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/BaseFormat.java
index 27f001e..0ebac77 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/BaseFormat.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/BaseFormat.java
@@ -25,6 +25,11 @@
  * @since 4.0
  */
 public class BaseFormat {
+	/**
+	 * Compression-level for the archive file. Only values in [0-9] are allowed.
+	 * @since 5.11
+	 */
+	protected static final String COMPRESSION_LEVEL = "compression-level"; //$NON-NLS-1$
 
 	/**
 	 * Apply options to archive output stream
@@ -40,6 +45,9 @@
 			Map<String, Object> o) throws IOException {
 		for (Map.Entry<String, Object> p : o.entrySet()) {
 			try {
+				if (p.getKey().equals(COMPRESSION_LEVEL)) {
+					continue;
+				}
 				new Statement(s, "set" + StringUtils.capitalize(p.getKey()), //$NON-NLS-1$
 						new Object[] { p.getValue() }).execute();
 			} catch (Exception e) {
@@ -49,4 +57,32 @@
 		}
 		return s;
 	}
+
+	/**
+	 * Removes and returns the {@link #COMPRESSION_LEVEL} key from the input map
+	 * parameter if it exists, or -1 if this key does not exist.
+	 *
+	 * @param o
+	 *            options map
+	 * @return The compression level if it exists in the map, or -1 instead.
+	 * @throws IllegalArgumentException
+	 *             if the {@link #COMPRESSION_LEVEL} option does not parse to an
+	 *             Integer.
+	 * @since 5.11
+	 */
+	protected int getCompressionLevel(Map<String, Object> o) {
+		if (!o.containsKey(COMPRESSION_LEVEL)) {
+			return -1;
+		}
+		Object option = o.get(COMPRESSION_LEVEL);
+		try {
+			Integer compressionLevel = (Integer) option;
+			return compressionLevel.intValue();
+		} catch (ClassCastException e) {
+			throw new IllegalArgumentException(
+					MessageFormat.format(
+							ArchiveText.get().invalidCompressionLevel, option),
+					e);
+		}
+	}
 }
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/Tbz2Format.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/Tbz2Format.java
index e880f5e..940dafd 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/Tbz2Format.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/Tbz2Format.java
@@ -45,7 +45,13 @@
 	@Override
 	public ArchiveOutputStream createArchiveOutputStream(OutputStream s,
 			Map<String, Object> o) throws IOException {
-		BZip2CompressorOutputStream out = new BZip2CompressorOutputStream(s);
+		BZip2CompressorOutputStream out;
+		int compressionLevel = getCompressionLevel(o);
+		if (compressionLevel != -1) {
+			out = new BZip2CompressorOutputStream(s, compressionLevel);
+		} else {
+			out = new BZip2CompressorOutputStream(s);
+		}
 		return tarFormat.createArchiveOutputStream(out, o);
 	}
 
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TgzFormat.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TgzFormat.java
index 859a59d..72e2439 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TgzFormat.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TgzFormat.java
@@ -18,6 +18,7 @@
 
 import org.apache.commons.compress.archivers.ArchiveOutputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipParameters;
 import org.eclipse.jgit.api.ArchiveCommand;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
@@ -45,7 +46,15 @@
 	@Override
 	public ArchiveOutputStream createArchiveOutputStream(OutputStream s,
 			Map<String, Object> o) throws IOException {
-		GzipCompressorOutputStream out = new GzipCompressorOutputStream(s);
+		GzipCompressorOutputStream out;
+		int compressionLevel = getCompressionLevel(o);
+		if (compressionLevel != -1) {
+			GzipParameters parameters = new GzipParameters();
+			parameters.setCompressionLevel(compressionLevel);
+			out = new GzipCompressorOutputStream(s, parameters);
+		} else {
+			out = new GzipCompressorOutputStream(s);
+		}
 		return tarFormat.createArchiveOutputStream(out, o);
 	}
 
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TxzFormat.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TxzFormat.java
index 484ab57..b16fb6d 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TxzFormat.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/TxzFormat.java
@@ -45,7 +45,13 @@
 	@Override
 	public ArchiveOutputStream createArchiveOutputStream(OutputStream s,
 			Map<String, Object> o) throws IOException {
-		XZCompressorOutputStream out = new XZCompressorOutputStream(s);
+		XZCompressorOutputStream out;
+		int compressionLevel = getCompressionLevel(o);
+		if (compressionLevel != -1) {
+			out = new XZCompressorOutputStream(s, compressionLevel);
+		} else {
+			out = new XZCompressorOutputStream(s);
+		}
 		return tarFormat.createArchiveOutputStream(out, o);
 	}
 
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/ZipFormat.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/ZipFormat.java
index 59a9765..97a24c7 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/ZipFormat.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/ZipFormat.java
@@ -47,7 +47,12 @@
 	@Override
 	public ArchiveOutputStream createArchiveOutputStream(OutputStream s,
 			Map<String, Object> o) throws IOException {
-		return applyFormatOptions(new ZipArchiveOutputStream(s), o);
+		ZipArchiveOutputStream out = new ZipArchiveOutputStream(s);
+		int compressionLevel = getCompressionLevel(o);
+		if (compressionLevel != -1) {
+			out.setLevel(compressionLevel);
+		}
+		return applyFormatOptions(out, o);
 	}
 
 	/** {@inheritDoc} */
diff --git a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/internal/ArchiveText.java b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/internal/ArchiveText.java
index 45f96fa..551646b 100644
--- a/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/internal/ArchiveText.java
+++ b/org.eclipse.jgit.archive/src/org/eclipse/jgit/archive/internal/ArchiveText.java
@@ -28,6 +28,7 @@
 
 	// @formatter:off
 	/***/ public String cannotSetOption;
+	/***/ public String invalidCompressionLevel;
 	/***/ public String pathDoesNotMatchMode;
 	/***/ public String unsupportedMode;
 }
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ArchiveCommandTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ArchiveCommandTest.java
index 0f98a63..f2cceac 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ArchiveCommandTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/api/ArchiveCommandTest.java
@@ -12,6 +12,7 @@
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 import java.beans.Statement;
 import java.io.BufferedInputStream;
@@ -28,6 +29,7 @@
 import java.util.List;
 import java.util.Map;
 
+import java.util.Random;
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
@@ -55,6 +57,7 @@
 import org.eclipse.jgit.util.StringUtils;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 public class ArchiveCommandTest extends RepositoryTestCase {
@@ -184,9 +187,55 @@
 
 	@Test
 	public void archiveHeadAllFilesTarTimestamps() throws Exception {
+		archiveHeadAllFiles("tar");
+	}
+
+	@Test
+	public void archiveHeadAllFilesTgzTimestamps() throws Exception {
+		archiveHeadAllFiles("tgz");
+	}
+
+	@Test
+	public void archiveHeadAllFilesTbz2Timestamps() throws Exception {
+		archiveHeadAllFiles("tbz2");
+	}
+
+	@Test
+	public void archiveHeadAllFilesTxzTimestamps() throws Exception {
+		archiveHeadAllFiles("txz");
+	}
+
+	@Test
+	public void archiveHeadAllFilesZipTimestamps() throws Exception {
+		archiveHeadAllFiles("zip");
+	}
+
+	@Test
+	public void archiveHeadAllFilesTgzWithCompressionReducesArchiveSize() throws Exception {
+		archiveHeadAllFilesWithCompression("tgz");
+	}
+
+	@Test
+	public void archiveHeadAllFilesTbz2WithCompressionReducesArchiveSize() throws Exception {
+		archiveHeadAllFilesWithCompression("tbz2");
+	}
+
+	@Test
+	@Ignore
+	public void archiveHeadAllFilesTxzWithCompressionReducesArchiveSize() throws Exception {
+		// We ignore this test because the txz format consumes a lot of memory for high level
+		// compressions.
+		archiveHeadAllFilesWithCompression("txz");
+	}
+
+	@Test
+	public void archiveHeadAllFilesZipWithCompressionReducesArchiveSize() throws Exception {
+		archiveHeadAllFilesWithCompression("zip");
+	}
+
+	private void archiveHeadAllFiles(String fmt) throws Exception {
 		try (Git git = new Git(db)) {
 			createTestContent(git);
-			String fmt = "tar";
 			File archive = new File(getTemporaryDirectory(),
 					"archive." + format);
 			archive(git, archive, fmt);
@@ -194,7 +243,7 @@
 
 			try (InputStream fi = Files.newInputStream(archive.toPath());
 					InputStream bi = new BufferedInputStream(fi);
-					ArchiveInputStream o = new TarArchiveInputStream(bi)) {
+					ArchiveInputStream o = createArchiveInputStream(fmt, bi)) {
 				assertEntries(o);
 			}
 
@@ -205,97 +254,42 @@
 		}
 	}
 
-	@Test
-	public void archiveHeadAllFilesTgzTimestamps() throws Exception {
+	@SuppressWarnings({ "serial", "boxing" })
+	private void archiveHeadAllFilesWithCompression(String fmt) throws Exception {
 		try (Git git = new Git(db)) {
-			createTestContent(git);
-			String fmt = "tgz";
+			createLargeTestContent(git);
 			File archive = new File(getTemporaryDirectory(),
-					"archive." + fmt);
-			archive(git, archive, fmt);
-			ObjectId hash1 = ObjectId.fromRaw(IO.readFully(archive));
+					"archive." + format);
 
-			try (InputStream fi = Files.newInputStream(archive.toPath());
-					InputStream bi = new BufferedInputStream(fi);
-					InputStream gzi = new GzipCompressorInputStream(bi);
-					ArchiveInputStream o = new TarArchiveInputStream(gzi)) {
-				assertEntries(o);
-			}
+			archive(git, archive, fmt, new HashMap<String, Object>() {{
+				put("compression-level", 1);
+			}});
+			int sizeCompression1 = getNumBytes(archive);
 
-			Thread.sleep(WAIT);
-			archive(git, archive, fmt);
-			assertEquals(UNEXPECTED_DIFFERENT_HASH, hash1,
-					ObjectId.fromRaw(IO.readFully(archive)));
+			archive(git, archive, fmt, new HashMap<String, Object>() {{
+				put("compression-level", 9);
+			}});
+			int sizeCompression9 = getNumBytes(archive);
+
+			assertTrue(sizeCompression1 > sizeCompression9);
 		}
 	}
 
-	@Test
-	public void archiveHeadAllFilesTbz2Timestamps() throws Exception {
-		try (Git git = new Git(db)) {
-			createTestContent(git);
-			String fmt = "tbz2";
-			File archive = new File(getTemporaryDirectory(),
-					"archive." + fmt);
-			archive(git, archive, fmt);
-			ObjectId hash1 = ObjectId.fromRaw(IO.readFully(archive));
-
-			try (InputStream fi = Files.newInputStream(archive.toPath());
-					InputStream bi = new BufferedInputStream(fi);
-					InputStream gzi = new BZip2CompressorInputStream(bi);
-					ArchiveInputStream o = new TarArchiveInputStream(gzi)) {
-				assertEntries(o);
-			}
-
-			Thread.sleep(WAIT);
-			archive(git, archive, fmt);
-			assertEquals(UNEXPECTED_DIFFERENT_HASH, hash1,
-					ObjectId.fromRaw(IO.readFully(archive)));
+	private static ArchiveInputStream createArchiveInputStream (String fmt, InputStream bi)
+			throws IOException {
+		switch (fmt) {
+			case "tar":
+				return new TarArchiveInputStream(bi);
+			case "tgz":
+				return new TarArchiveInputStream(new GzipCompressorInputStream(bi));
+			case "tbz2":
+				return new TarArchiveInputStream(new BZip2CompressorInputStream(bi));
+			case "txz":
+				return new TarArchiveInputStream(new XZCompressorInputStream(bi));
+			case "zip":
+				return new ZipArchiveInputStream(new BufferedInputStream(bi));
 		}
-	}
-
-	@Test
-	public void archiveHeadAllFilesTxzTimestamps() throws Exception {
-		try (Git git = new Git(db)) {
-			createTestContent(git);
-			String fmt = "txz";
-			File archive = new File(getTemporaryDirectory(), "archive." + fmt);
-			archive(git, archive, fmt);
-			ObjectId hash1 = ObjectId.fromRaw(IO.readFully(archive));
-
-			try (InputStream fi = Files.newInputStream(archive.toPath());
-					InputStream bi = new BufferedInputStream(fi);
-					InputStream gzi = new XZCompressorInputStream(bi);
-					ArchiveInputStream o = new TarArchiveInputStream(gzi)) {
-				assertEntries(o);
-			}
-
-			Thread.sleep(WAIT);
-			archive(git, archive, fmt);
-			assertEquals(UNEXPECTED_DIFFERENT_HASH, hash1,
-					ObjectId.fromRaw(IO.readFully(archive)));
-		}
-	}
-
-	@Test
-	public void archiveHeadAllFilesZipTimestamps() throws Exception {
-		try (Git git = new Git(db)) {
-			createTestContent(git);
-			String fmt = "zip";
-			File archive = new File(getTemporaryDirectory(), "archive." + fmt);
-			archive(git, archive, fmt);
-			ObjectId hash1 = ObjectId.fromRaw(IO.readFully(archive));
-
-			try (InputStream fi = Files.newInputStream(archive.toPath());
-					InputStream bi = new BufferedInputStream(fi);
-					ArchiveInputStream o = new ZipArchiveInputStream(bi)) {
-				assertEntries(o);
-			}
-
-			Thread.sleep(WAIT);
-			archive(git, archive, fmt);
-			assertEquals(UNEXPECTED_DIFFERENT_HASH, hash1,
-					ObjectId.fromRaw(IO.readFully(archive)));
-		}
+		throw new IllegalArgumentException("Format " + fmt + " is not supported.");
 	}
 
 	private void createTestContent(Git git) throws IOException, GitAPIException,
@@ -312,13 +306,40 @@
 		git.commit().setMessage("updated file").call();
 	}
 
+	private void createLargeTestContent(Git git) throws IOException, GitAPIException,
+			NoFilepatternException, NoHeadException, NoMessageException,
+			UnmergedPathsException, ConcurrentRefUpdateException,
+			WrongRepositoryStateException, AbortedByHookException {
+		StringBuilder largeContent = new StringBuilder();
+		Random r = new Random();
+		for (int i = 0; i < 2000; i++) {
+			for (int j = 0; j < 80; j++) {
+				largeContent.append((char)(r.nextInt(26) + 'a'));
+			}
+			largeContent.append("\n");
+		}
+		writeTrashFile("large_file.txt", largeContent.toString());
+		git.add().addFilepattern("large_file.txt").call();
+		git.commit().setMessage("create file").call();
+	}
+
 	private static void archive(Git git, File archive, String fmt)
 			throws GitAPIException,
 			FileNotFoundException, AmbiguousObjectException,
 			IncorrectObjectTypeException, IOException {
+		archive(git, archive, fmt, new HashMap<>());
+	}
+
+	private static void archive(Git git, File archive, String fmt, Map<String,
+			Object> options)
+			throws GitAPIException,
+			FileNotFoundException, AmbiguousObjectException,
+			IncorrectObjectTypeException, IOException {
 		git.archive().setOutputStream(new FileOutputStream(archive))
 				.setFormat(fmt)
-				.setTree(git.getRepository().resolve("HEAD")).call();
+				.setTree(git.getRepository().resolve("HEAD"))
+				.setFormatOptions(options)
+				.call();
 	}
 
 	private static void assertEntries(ArchiveInputStream o) throws IOException {
@@ -333,6 +354,13 @@
 		assertEquals(UNEXPECTED_ARCHIVE_SIZE, 2, n);
 	}
 
+	private static int getNumBytes(File archive) throws Exception {
+		try (InputStream fi = Files.newInputStream(archive.toPath());
+				InputStream bi = new BufferedInputStream(fi)) {
+			return bi.available();
+		}
+	}
+
 	private static class MockFormat
 			implements ArchiveCommand.Format<MockOutputStream> {