Merge branch 'stable-2.16' into stable-3.0

* stable-2.16:
  Add missing newline at end of tools/BUILD
  Format Java files with google-java-format
  Update external dependencies
  Revert "Remove Tika dependency for uploadvalidator"
  Add test for mime type detection

Change-Id: Ie57e47d1737b1bd2f367c7f62c32591456e5071a
diff --git a/BUILD b/BUILD
index 4ae3ed0..d73e553 100644
--- a/BUILD
+++ b/BUILD
@@ -11,12 +11,16 @@
         "Gerrit-Module: com.googlesource.gerrit.plugins.uploadvalidator.Module",
     ],
     resources = glob(["src/main/resources/**/*"]),
+    deps = [
+        "@tika-core//jar",
+    ],
 )
 
 TEST_SRCS = "src/test/java/**/*Test.java"
 
 TEST_DEPS = PLUGIN_DEPS + PLUGIN_TEST_DEPS + [
-    "@commons_io//jar",
+    "@commons-io//jar",
+    "@tika-core//jar",
     ":uploadvalidator__plugin",
 ]
 
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl
index 93746e1..8d85336 100644
--- a/external_plugin_deps.bzl
+++ b/external_plugin_deps.bzl
@@ -2,7 +2,12 @@
 
 def external_plugin_deps():
     maven_jar(
-        name = "commons_io",
-        artifact = "commons-io:commons-io:1.4",
-        sha1 = "a8762d07e76cfde2395257a5da47ba7c1dbd3dce",
+        name = "commons-io",
+        artifact = "commons-io:commons-io:2.2",
+        sha1 = "83b5b8a7ba1c08f9e8c8ff2373724e33d3c1e22a",
+    )
+    maven_jar(
+        name = "tika-core",
+        artifact = "org.apache.tika:tika-core:1.24.1",
+        sha1 = "703e65fb300d1425d4ad7b68c21c7795bb7a95c3",
     )
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
index ddb18db..67ae289 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -22,7 +22,6 @@
 import com.google.gerrit.extensions.api.projects.ProjectConfigEntryType;
 import com.google.gerrit.server.config.PluginConfig;
 import com.google.gerrit.server.config.ProjectConfigEntry;
-import com.google.gerrit.server.mime.FileTypeRegistry;
 import com.google.inject.AbstractModule;
 import com.google.inject.Inject;
 import com.google.inject.name.Named;
@@ -30,6 +29,10 @@
 import java.io.InputStream;
 import java.util.concurrent.ExecutionException;
 import java.util.regex.Pattern;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
 import org.eclipse.jgit.lib.ObjectLoader;
 
 public class ContentTypeUtil {
@@ -65,24 +68,24 @@
   }
 
   private final LoadingCache<String, Pattern> patternCache;
-  private final FileTypeRegistry mimeUtil;
+  private final Tika tika = new Tika(TikaConfig.getDefaultConfig());
 
   @Inject
-  ContentTypeUtil(
-      @Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache, FileTypeRegistry mimeUtil) {
+  ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) {
     this.patternCache = patternCache;
-    this.mimeUtil = mimeUtil;
   }
 
   public boolean isBlacklistedBinaryContentType(ObjectLoader ol, String pathname, PluginConfig cfg)
       throws IOException, ExecutionException {
-    return matchesAny(getContentType(ol, pathname), getBinaryTypes(cfg));
+    try (InputStream is = ol.openStream()) {
+      return matchesAny(getContentType(is, pathname), getBinaryTypes(cfg));
+    }
   }
 
-  public String getContentType(ObjectLoader ol, String pathname) throws IOException {
-    try (InputStream is = ol.openStream()) {
-      return mimeUtil.getMimeType(pathname, is).toString();
-    }
+  public String getContentType(InputStream is, String pathname) throws IOException {
+    Metadata metadata = new Metadata();
+    metadata.set(Metadata.RESOURCE_NAME_KEY, pathname);
+    return tika.detect(TikaInputStream.get(is), metadata);
   }
 
   @VisibleForTesting
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
index 1cc5fd4..0d8f2e4 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
@@ -38,6 +38,7 @@
 import java.util.concurrent.ExecutionException;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectStream;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevWalk;
@@ -153,13 +154,15 @@
     List<CommitValidationMessage> messages = new LinkedList<>();
     Map<String, ObjectId> content = CommitUtils.getChangedContent(repo, c, revWalk);
     for (String path : content.keySet()) {
-      ObjectLoader ol = revWalk.getObjectReader().open(content.get(path));
-      String contentType = contentTypeUtil.getContentType(ol, path);
-      if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
-          || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
-        messages.add(
-            new CommitValidationMessage(
-                "found blocked content type (" + contentType + ") in file: " + path, true));
+      ObjectLoader ol = repo.open(content.get(path));
+      try (ObjectStream os = ol.openStream()) {
+        String contentType = contentTypeUtil.getContentType(os, path);
+        if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
+            || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
+          messages.add(
+              new CommitValidationMessage(
+                  "found blocked content type (" + contentType + ") in file: " + path, true));
+        }
       }
     }
     return messages;
diff --git a/src/main/resources/Documentation/build.md b/src/main/resources/Documentation/build.md
index 3aafe70..c41f14e 100644
--- a/src/main/resources/Documentation/build.md
+++ b/src/main/resources/Documentation/build.md
@@ -60,6 +60,15 @@
   ln -s ../../@PLUGIN@ .
 ```
 
+Put the external dependency Bazel build file into the Gerrit /plugins
+directory, replacing the existing empty one.
+
+```
+  cd gerrit/plugins
+  rm external_plugin_deps.bzl
+  ln -s @PLUGIN@/external_plugin_deps.bzl .
+```
+
 From Gerrit source tree issue the command:
 
 ```
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index e1cf169..5a2e50c 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -109,7 +109,7 @@
     If there is a NUL byte in the first 8k then the file will be considered
     binary regardless of this setting.
 
-    To detect content types, the [MimeUtil2 library][2] is used.
+	To detect content types [Apache Tika library][2] is used.
 
     Content type can be specified as a string, wildcard or a regular expression,
     for example:
@@ -154,8 +154,8 @@
     This check does not run on [binary files][4]
 
 [1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
-[2]: https://mvnrepository.com/artifact/eu.medsea.mimeutil/mime-util
-[3]: https://gerrit.googlesource.com/gerrit/+/refs/heads/master/gerrit-server/src/main/resources/com/google/gerrit/server/mime/mime-types.properties
+[2]: https://tika.apache.org/
+[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats
 [4]: #binary_type
 
 plugin.@PLUGIN@.blockedContentType
@@ -164,7 +164,7 @@
     This check looks for blocked content types. If the check finds a
     blocked content type the push will be rejected.
 
-    To detect content types the [MimeUtil2 library][2] is used.
+	To detect content types [Apache Tika library][2] is used.
 
     Content type can be specified as a string, wildcard or a regular expression,
     for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
index 20f2e83..a22baa0 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
@@ -76,12 +76,7 @@
       RevCommit c = makeCommit(rw);
       BlockedKeywordValidator validator =
           new BlockedKeywordValidator(
-              null,
-              new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
-              PATTERN_CACHE,
-              null,
-              null,
-              null);
+              null, new ContentTypeUtil(PATTERN_CACHE), PATTERN_CACHE, null, null, null);
       List<CommitValidationMessage> m =
           validator.performValidation(repo, c, rw, getPatterns().values(), EMPTY_PLUGIN_CONFIG);
       Set<String> expected =
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
index a8e2713..91df4c6 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
@@ -18,19 +18,16 @@
 import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.EMPTY_PLUGIN_CONFIG;
 import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.PATTERN_CACHE;
 
-import com.google.gerrit.server.mime.MimeUtilFileTypeRegistry;
-import com.google.inject.Inject;
 import java.util.concurrent.ExecutionException;
 import org.junit.Before;
 import org.junit.Test;
 
 public class ContentTypeUtilTest {
   private ContentTypeUtil ctu;
-  @Inject private MimeUtilFileTypeRegistry mimeUtil;
 
   @Before
   public void setUp() {
-    ctu = new ContentTypeUtil(PATTERN_CACHE, mimeUtil);
+    ctu = new ContentTypeUtil(PATTERN_CACHE);
   }
 
   @Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
index b57ef49..6477189 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
@@ -60,12 +60,7 @@
   @Before
   public void setUp() {
     validator =
-        new ContentTypeValidator(
-            null,
-            new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
-            null,
-            null,
-            null);
+        new ContentTypeValidator(null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
   }
 
   @Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
index 17209d2..e0b2b68 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
@@ -53,11 +53,7 @@
       RevCommit c = makeCommit(rw);
       InvalidLineEndingValidator validator =
           new InvalidLineEndingValidator(
-              null,
-              new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
-              null,
-              null,
-              null);
+              null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
       List<CommitValidationMessage> m =
           validator.performValidation(repo, c, rw, EMPTY_PLUGIN_CONFIG);
       assertThat(TestUtils.transformMessages(m))
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
new file mode 100644
index 0000000..ac38404
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.uploadvalidator;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+class MimeTypeDetection {
+  public String getMimeType(String path, byte[] content) throws IOException {
+    Tika tika = new Tika(TikaConfig.getDefaultConfig());
+
+    Metadata metadata = new Metadata();
+    metadata.set(Metadata.RESOURCE_NAME_KEY, path);
+
+    ByteArrayInputStream bis = new ByteArrayInputStream(content);
+    return tika.detect(TikaInputStream.get(bis), metadata);
+  }
+}
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java
new file mode 100644
index 0000000..8c1c920
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java
@@ -0,0 +1,95 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.uploadvalidator;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypeDetectionTest extends ValidatorTestCase {
+  private class FileContent {
+    public FileContent(String fileName, byte[] content, String contentType) {
+      this.fileName = fileName;
+      this.content = content;
+      this.contentType = contentType;
+    }
+
+    public String fileName;
+    public byte[] content;
+    public String contentType;
+  }
+
+  private static final byte[] TEST_PDF =
+      ("%PDF-1.4\n"
+              + "1 0 obj << /Type /Catalog /Outlines 2 0 R /Pages 3 0 R >>\n"
+              + "endobj 2 0 obj << /Type /Outlines /Count 0 >>\n"
+              + "endobj 3 0 obj << /Type /Pages /Kids [4 0 R] /Count 1\n"
+              + ">> endobj 4 0 obj << /Type /Page /Parent 3 0 R\n"
+              + "/MediaBox [0 0 612 144] /Contents 5 0 R /Resources << /ProcSet 6 0 R\n"
+              + "/Font << /F1 7 0 R >> >> >> endobj 5 0 obj\n"
+              + "<< /Length 73 >> stream BT\n"
+              + "/F1 24 Tf\n"
+              + "100 100 Td\n"
+              + "(Small pdf) Tj\n"
+              + "ET endstream endobj 6 0 obj [/PDF /Text] endobj 7 0 obj\n"
+              + "<< /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica\n"
+              + "/Encoding /MacRomanEncoding >> endobj xref 0 8\n"
+              + "0000000000 65535 f 0000000009 00000 n 0000000074 00000 n\n"
+              + "0000000120 00000 n 0000000179 00000 n 0000000364 00000 n\n"
+              + "0000000466 00000 n 0000000496 00000 n\n"
+              + "trailer << /Size 8 /Root 1 0 R >> startxref 625\n"
+              + "%%EOF")
+          .getBytes(StandardCharsets.UTF_8);
+
+  private MimeTypeDetection detection;
+
+  @Before
+  public void setUp() {
+    detection = new MimeTypeDetection();
+  }
+
+  @Test
+  public void testMimeTypeDetection() throws Exception {
+    List<FileContent> files = createFiles();
+    for (FileContent file : files) {
+      assertThat(detection.getMimeType(file.fileName, file.content).toString())
+          .isEqualTo(file.contentType);
+    }
+  }
+
+  private List<FileContent> createFiles() {
+    List<FileContent> files = new ArrayList<>();
+
+    String content = "<?xml version=\"1.0\"?><a><b>c</b></a>";
+    files.add(
+        new FileContent("foo.xml", content.getBytes(StandardCharsets.UTF_8), "application/xml"));
+
+    content = "<html><body><h1>Hello World!</h1></body></html>";
+    files.add(new FileContent("foo.html", content.getBytes(StandardCharsets.UTF_8), "text/html"));
+
+    content = "Hello,World";
+    files.add(new FileContent("foo.csv", content.getBytes(StandardCharsets.UTF_8), "text/csv"));
+
+    content = "hello=world";
+    files.add(new FileContent("foo", content.getBytes(StandardCharsets.UTF_8), "text/plain"));
+
+    files.add(new FileContent("foo.pdf", TEST_PDF, "application/pdf"));
+    return files;
+  }
+}
diff --git a/tools/BUILD b/tools/BUILD
index 1fa2160..cc10083 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -1 +1 @@
-# Empty file - bazel treat directories with BUILD file as a package
\ No newline at end of file
+# Empty file - bazel treat directories with BUILD file as a package