Merge branch 'stable-2.16' into stable-3.0
* stable-2.16:
Add missing newline at end of tools/BUILD
Format Java files with google-java-format
Update external dependencies
Revert "Remove Tika dependency for uploadvalidator"
Add test for mime type detection
Change-Id: Ie57e47d1737b1bd2f367c7f62c32591456e5071a
diff --git a/BUILD b/BUILD
index 4ae3ed0..d73e553 100644
--- a/BUILD
+++ b/BUILD
@@ -11,12 +11,16 @@
"Gerrit-Module: com.googlesource.gerrit.plugins.uploadvalidator.Module",
],
resources = glob(["src/main/resources/**/*"]),
+ deps = [
+ "@tika-core//jar",
+ ],
)
TEST_SRCS = "src/test/java/**/*Test.java"
TEST_DEPS = PLUGIN_DEPS + PLUGIN_TEST_DEPS + [
- "@commons_io//jar",
+ "@commons-io//jar",
+ "@tika-core//jar",
":uploadvalidator__plugin",
]
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl
index 93746e1..8d85336 100644
--- a/external_plugin_deps.bzl
+++ b/external_plugin_deps.bzl
@@ -2,7 +2,12 @@
def external_plugin_deps():
maven_jar(
- name = "commons_io",
- artifact = "commons-io:commons-io:1.4",
- sha1 = "a8762d07e76cfde2395257a5da47ba7c1dbd3dce",
+ name = "commons-io",
+ artifact = "commons-io:commons-io:2.2",
+ sha1 = "83b5b8a7ba1c08f9e8c8ff2373724e33d3c1e22a",
+ )
+ maven_jar(
+ name = "tika-core",
+ artifact = "org.apache.tika:tika-core:1.24.1",
+ sha1 = "703e65fb300d1425d4ad7b68c21c7795bb7a95c3",
)
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
index ddb18db..67ae289 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -22,7 +22,6 @@
import com.google.gerrit.extensions.api.projects.ProjectConfigEntryType;
import com.google.gerrit.server.config.PluginConfig;
import com.google.gerrit.server.config.ProjectConfigEntry;
-import com.google.gerrit.server.mime.FileTypeRegistry;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.name.Named;
@@ -30,6 +29,10 @@
import java.io.InputStream;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
import org.eclipse.jgit.lib.ObjectLoader;
public class ContentTypeUtil {
@@ -65,24 +68,24 @@
}
private final LoadingCache<String, Pattern> patternCache;
- private final FileTypeRegistry mimeUtil;
+ private final Tika tika = new Tika(TikaConfig.getDefaultConfig());
@Inject
- ContentTypeUtil(
- @Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache, FileTypeRegistry mimeUtil) {
+ ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) {
this.patternCache = patternCache;
- this.mimeUtil = mimeUtil;
}
public boolean isBlacklistedBinaryContentType(ObjectLoader ol, String pathname, PluginConfig cfg)
throws IOException, ExecutionException {
- return matchesAny(getContentType(ol, pathname), getBinaryTypes(cfg));
+ try (InputStream is = ol.openStream()) {
+ return matchesAny(getContentType(is, pathname), getBinaryTypes(cfg));
+ }
}
- public String getContentType(ObjectLoader ol, String pathname) throws IOException {
- try (InputStream is = ol.openStream()) {
- return mimeUtil.getMimeType(pathname, is).toString();
- }
+ public String getContentType(InputStream is, String pathname) throws IOException {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, pathname);
+ return tika.detect(TikaInputStream.get(is), metadata);
}
@VisibleForTesting
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
index 1cc5fd4..0d8f2e4 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
@@ -38,6 +38,7 @@
import java.util.concurrent.ExecutionException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
@@ -153,13 +154,15 @@
List<CommitValidationMessage> messages = new LinkedList<>();
Map<String, ObjectId> content = CommitUtils.getChangedContent(repo, c, revWalk);
for (String path : content.keySet()) {
- ObjectLoader ol = revWalk.getObjectReader().open(content.get(path));
- String contentType = contentTypeUtil.getContentType(ol, path);
- if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
- || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
- messages.add(
- new CommitValidationMessage(
- "found blocked content type (" + contentType + ") in file: " + path, true));
+ ObjectLoader ol = repo.open(content.get(path));
+ try (ObjectStream os = ol.openStream()) {
+ String contentType = contentTypeUtil.getContentType(os, path);
+ if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
+ || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
+ messages.add(
+ new CommitValidationMessage(
+ "found blocked content type (" + contentType + ") in file: " + path, true));
+ }
}
}
return messages;
diff --git a/src/main/resources/Documentation/build.md b/src/main/resources/Documentation/build.md
index 3aafe70..c41f14e 100644
--- a/src/main/resources/Documentation/build.md
+++ b/src/main/resources/Documentation/build.md
@@ -60,6 +60,15 @@
ln -s ../../@PLUGIN@ .
```
+Put the external dependency Bazel build file into the Gerrit /plugins
+directory, replacing the existing empty one.
+
+```
+ cd gerrit/plugins
+ rm external_plugin_deps.bzl
+ ln -s @PLUGIN@/external_plugin_deps.bzl .
+```
+
From Gerrit source tree issue the command:
```
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index e1cf169..5a2e50c 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -109,7 +109,7 @@
If there is a NUL byte in the first 8k then the file will be considered
binary regardless of this setting.
- To detect content types, the [MimeUtil2 library][2] is used.
+ To detect content types [Apache Tika library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
@@ -154,8 +154,8 @@
This check does not run on [binary files][4]
[1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
-[2]: https://mvnrepository.com/artifact/eu.medsea.mimeutil/mime-util
-[3]: https://gerrit.googlesource.com/gerrit/+/refs/heads/master/gerrit-server/src/main/resources/com/google/gerrit/server/mime/mime-types.properties
+[2]: https://tika.apache.org/
+[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats
[4]: #binary_type
plugin.@PLUGIN@.blockedContentType
@@ -164,7 +164,7 @@
This check looks for blocked content types. If the check finds a
blocked content type the push will be rejected.
- To detect content types the [MimeUtil2 library][2] is used.
+ To detect content types [Apache Tika library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
index 20f2e83..a22baa0 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
@@ -76,12 +76,7 @@
RevCommit c = makeCommit(rw);
BlockedKeywordValidator validator =
new BlockedKeywordValidator(
- null,
- new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
- PATTERN_CACHE,
- null,
- null,
- null);
+ null, new ContentTypeUtil(PATTERN_CACHE), PATTERN_CACHE, null, null, null);
List<CommitValidationMessage> m =
validator.performValidation(repo, c, rw, getPatterns().values(), EMPTY_PLUGIN_CONFIG);
Set<String> expected =
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
index a8e2713..91df4c6 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
@@ -18,19 +18,16 @@
import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.EMPTY_PLUGIN_CONFIG;
import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.PATTERN_CACHE;
-import com.google.gerrit.server.mime.MimeUtilFileTypeRegistry;
-import com.google.inject.Inject;
import java.util.concurrent.ExecutionException;
import org.junit.Before;
import org.junit.Test;
public class ContentTypeUtilTest {
private ContentTypeUtil ctu;
- @Inject private MimeUtilFileTypeRegistry mimeUtil;
@Before
public void setUp() {
- ctu = new ContentTypeUtil(PATTERN_CACHE, mimeUtil);
+ ctu = new ContentTypeUtil(PATTERN_CACHE);
}
@Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
index b57ef49..6477189 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
@@ -60,12 +60,7 @@
@Before
public void setUp() {
validator =
- new ContentTypeValidator(
- null,
- new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
- null,
- null,
- null);
+ new ContentTypeValidator(null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
}
@Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
index 17209d2..e0b2b68 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
@@ -53,11 +53,7 @@
RevCommit c = makeCommit(rw);
InvalidLineEndingValidator validator =
new InvalidLineEndingValidator(
- null,
- new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
- null,
- null,
- null);
+ null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
List<CommitValidationMessage> m =
validator.performValidation(repo, c, rw, EMPTY_PLUGIN_CONFIG);
assertThat(TestUtils.transformMessages(m))
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
new file mode 100644
index 0000000..ac38404
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.uploadvalidator;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+class MimeTypeDetection {
+ public String getMimeType(String path, byte[] content) throws IOException {
+ Tika tika = new Tika(TikaConfig.getDefaultConfig());
+
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, path);
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(content);
+ return tika.detect(TikaInputStream.get(bis), metadata);
+ }
+}
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java
new file mode 100644
index 0000000..8c1c920
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetectionTest.java
@@ -0,0 +1,95 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.uploadvalidator;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MimeTypeDetectionTest extends ValidatorTestCase {
+ private class FileContent {
+ public FileContent(String fileName, byte[] content, String contentType) {
+ this.fileName = fileName;
+ this.content = content;
+ this.contentType = contentType;
+ }
+
+ public String fileName;
+ public byte[] content;
+ public String contentType;
+ }
+
+ private static final byte[] TEST_PDF =
+ ("%PDF-1.4\n"
+ + "1 0 obj << /Type /Catalog /Outlines 2 0 R /Pages 3 0 R >>\n"
+ + "endobj 2 0 obj << /Type /Outlines /Count 0 >>\n"
+ + "endobj 3 0 obj << /Type /Pages /Kids [4 0 R] /Count 1\n"
+ + ">> endobj 4 0 obj << /Type /Page /Parent 3 0 R\n"
+ + "/MediaBox [0 0 612 144] /Contents 5 0 R /Resources << /ProcSet 6 0 R\n"
+ + "/Font << /F1 7 0 R >> >> >> endobj 5 0 obj\n"
+ + "<< /Length 73 >> stream BT\n"
+ + "/F1 24 Tf\n"
+ + "100 100 Td\n"
+ + "(Small pdf) Tj\n"
+ + "ET endstream endobj 6 0 obj [/PDF /Text] endobj 7 0 obj\n"
+ + "<< /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica\n"
+ + "/Encoding /MacRomanEncoding >> endobj xref 0 8\n"
+ + "0000000000 65535 f 0000000009 00000 n 0000000074 00000 n\n"
+ + "0000000120 00000 n 0000000179 00000 n 0000000364 00000 n\n"
+ + "0000000466 00000 n 0000000496 00000 n\n"
+ + "trailer << /Size 8 /Root 1 0 R >> startxref 625\n"
+ + "%%EOF")
+ .getBytes(StandardCharsets.UTF_8);
+
+ private MimeTypeDetection detection;
+
+ @Before
+ public void setUp() {
+ detection = new MimeTypeDetection();
+ }
+
+ @Test
+ public void testMimeTypeDetection() throws Exception {
+ List<FileContent> files = createFiles();
+ for (FileContent file : files) {
+ assertThat(detection.getMimeType(file.fileName, file.content).toString())
+ .isEqualTo(file.contentType);
+ }
+ }
+
+ private List<FileContent> createFiles() {
+ List<FileContent> files = new ArrayList<>();
+
+ String content = "<?xml version=\"1.0\"?><a><b>c</b></a>";
+ files.add(
+ new FileContent("foo.xml", content.getBytes(StandardCharsets.UTF_8), "application/xml"));
+
+ content = "<html><body><h1>Hello World!</h1></body></html>";
+ files.add(new FileContent("foo.html", content.getBytes(StandardCharsets.UTF_8), "text/html"));
+
+ content = "Hello,World";
+ files.add(new FileContent("foo.csv", content.getBytes(StandardCharsets.UTF_8), "text/csv"));
+
+ content = "hello=world";
+ files.add(new FileContent("foo", content.getBytes(StandardCharsets.UTF_8), "text/plain"));
+
+ files.add(new FileContent("foo.pdf", TEST_PDF, "application/pdf"));
+ return files;
+ }
+}
diff --git a/tools/BUILD b/tools/BUILD
index 1fa2160..cc10083 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -1 +1 @@
-# Empty file - bazel treat directories with BUILD file as a package
\ No newline at end of file
+# Empty file - bazel treat directories with BUILD file as a package