Remove Tika dependency for uploadvalidator
Tika was originally used over the MimeUtils2 library
because it was claimed that project.config was detected
as application/octet-stream. While project.config is in
fact detected as text/x-ini, other .config files are
detected as application/octet-stream.
Since I35920f66a makes all .config files detected as text/x-ini,
we can remove the dependency on Tika introduced in Ia2529f187.
Change-Id: Ib9a64557b89058fc246a611c3e8a1be74eecf042
diff --git a/BUILD b/BUILD
index a5fa04e..dbfeb5a 100644
--- a/BUILD
+++ b/BUILD
@@ -12,7 +12,6 @@
resources = glob(["src/main/resources/**/*"]),
deps = [
"@commons_io//jar",
- "@tika_core//jar",
],
)
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl
deleted file mode 100644
index d8ca858..0000000
--- a/external_plugin_deps.bzl
+++ /dev/null
@@ -1,8 +0,0 @@
-load("//tools/bzl:maven_jar.bzl", "maven_jar")
-
-def external_plugin_deps():
- maven_jar(
- name = 'tika_core',
- artifact = 'org.apache.tika:tika-core:1.12',
- sha1 = '5ab95580d22fe1dee79cffbcd98bb509a32da09b',
- )
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
index 2fe6479..133a42d 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -22,14 +22,11 @@
import com.google.gerrit.extensions.api.projects.ProjectConfigEntryType;
import com.google.gerrit.server.config.PluginConfig;
import com.google.gerrit.server.config.ProjectConfigEntry;
+import com.google.gerrit.server.mime.FileTypeRegistry;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.name.Named;
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
import org.eclipse.jgit.lib.ObjectLoader;
import java.io.IOException;
@@ -70,24 +67,24 @@
}
private final LoadingCache<String, Pattern> patternCache;
- private final Tika tika = new Tika(TikaConfig.getDefaultConfig());
+ private final FileTypeRegistry mimeUtil;
@Inject
- ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) {
+ ContentTypeUtil(
+ @Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache, FileTypeRegistry mimeUtil) {
this.patternCache = patternCache;
+ this.mimeUtil = mimeUtil;
}
public boolean isBlacklistedBinaryContentType(ObjectLoader ol, String pathname, PluginConfig cfg)
throws IOException, ExecutionException {
- try (InputStream is = ol.openStream()) {
- return matchesAny(getContentType(is, pathname), getBinaryTypes(cfg));
- }
+ return matchesAny(getContentType(ol, pathname), getBinaryTypes(cfg));
}
- public String getContentType(InputStream is, String pathname) throws IOException {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, pathname);
- return tika.detect(TikaInputStream.get(is), metadata);
+ public String getContentType(ObjectLoader ol, String pathname) throws IOException {
+ try (InputStream is = ol.openStream()) {
+ return mimeUtil.getMimeType(pathname, is).toString();
+ }
}
@VisibleForTesting
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
index eb1aa77..281ec44 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidator.java
@@ -33,7 +33,6 @@
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
-import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
@@ -157,14 +156,12 @@
Map<String, ObjectId> content = CommitUtils.getChangedContent(repo, c, revWalk);
for (String path : content.keySet()) {
ObjectLoader ol = repo.open(content.get(path));
- try (ObjectStream os = ol.openStream()) {
- String contentType = contentTypeUtil.getContentType(os, path);
- if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
- || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
- messages.add(
- new CommitValidationMessage(
- "found blocked content type (" + contentType + ") in file: " + path, true));
- }
+ String contentType = contentTypeUtil.getContentType(ol, path);
+ if ((contentTypeUtil.matchesAny(contentType, blockedTypes) && !whitelist)
+ || (!contentTypeUtil.matchesAny(contentType, blockedTypes) && whitelist)) {
+ messages.add(
+ new CommitValidationMessage(
+ "found blocked content type (" + contentType + ") in file: " + path, true));
}
}
return messages;
diff --git a/src/main/resources/Documentation/build.md b/src/main/resources/Documentation/build.md
index 81182de..e08cffa 100644
--- a/src/main/resources/Documentation/build.md
+++ b/src/main/resources/Documentation/build.md
@@ -14,15 +14,6 @@
ln -s ../../@PLUGIN@ .
```
-Put the external dependency Bazel build file into the Gerrit /plugins
-directory, replacing the existing empty one.
-
-```
- cd gerrit/plugins
- rm external_plugin_deps.bzl
- ln -s @PLUGIN@/external_plugin_deps.bzl .
-```
-
From Gerrit source tree issue the command:
```
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index c85039c..18b79ee 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -79,7 +79,7 @@
If there is a NUL byte in the first 8k then the file will be considered
binary regardless of this setting.
- To detect content types [Apache Tika library][2] is used.
+ To detect content types, the [MimeUtil2 library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
@@ -124,8 +124,8 @@
This check does not run on [binary files][4]
[1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
-[2]: https://tika.apache.org/
-[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats
+[2]: https://mvnrepository.com/artifact/eu.medsea.mimeutil/mime-util
+[3]: https://gerrit.googlesource.com/gerrit/+/refs/heads/master/gerrit-server/src/main/resources/com/google/gerrit/server/mime/mime-types.properties
[4]: #binary_type
plugin.@PLUGIN@.blockedContentType
@@ -134,7 +134,7 @@
This check looks for blocked content types. If the check finds a
blocked content type the push will be rejected.
- To detect content types [Apache Tika library][2] is used.
+ To detect content types the [MimeUtil2 library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
index 6a178f0..a5b0c32 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/BlockedKeywordValidatorTest.java
@@ -78,7 +78,12 @@
RevCommit c = makeCommit(rw);
BlockedKeywordValidator validator =
new BlockedKeywordValidator(
- null, new ContentTypeUtil(PATTERN_CACHE), PATTERN_CACHE, null, null, null);
+ null,
+ new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
+ PATTERN_CACHE,
+ null,
+ null,
+ null);
List<CommitValidationMessage> m =
validator.performValidation(repo, c, rw, getPatterns().values(), EMPTY_PLUGIN_CONFIG);
Set<String> expected =
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
index 5e232e4..31025b6 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtilTest.java
@@ -18,6 +18,9 @@
import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.EMPTY_PLUGIN_CONFIG;
import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.PATTERN_CACHE;
+import com.google.gerrit.server.mime.MimeUtilFileTypeRegistry;
+import com.google.inject.Inject;
+
import org.junit.Before;
import org.junit.Test;
@@ -25,10 +28,11 @@
public class ContentTypeUtilTest {
private ContentTypeUtil ctu;
+ @Inject private MimeUtilFileTypeRegistry mimeUtil;
@Before
public void setUp() {
- ctu = new ContentTypeUtil(PATTERN_CACHE);
+ ctu = new ContentTypeUtil(PATTERN_CACHE, mimeUtil);
}
@Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
index 5d6dfad..092c251 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeValidatorTest.java
@@ -62,7 +62,12 @@
@Before
public void setUp() {
validator =
- new ContentTypeValidator(null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
+ new ContentTypeValidator(
+ null,
+ new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
+ null,
+ null,
+ null);
}
@Test
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/FakeMimeUtilFileTypeRegistry.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/FakeMimeUtilFileTypeRegistry.java
new file mode 100644
index 0000000..f2e275b
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/FakeMimeUtilFileTypeRegistry.java
@@ -0,0 +1,59 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.plugins.uploadvalidator;
+
+import com.google.gerrit.server.mime.FileTypeRegistry;
+import com.google.inject.Singleton;
+
+import java.io.InputStream;
+
+import eu.medsea.mimeutil.MimeType;
+
+@Singleton
+class FakeMimeUtilFileTypeRegistry implements FileTypeRegistry {
+
+ @Override
+ public MimeType getMimeType(String path, byte[] content) {
+ if (path.endsWith(".pdf")) {
+ return new MimeType("application/pdf");
+ }
+ if (path.endsWith(".xml")) {
+ return new MimeType("application/xml");
+ }
+ if (path.endsWith(".html")) {
+ return new MimeType("text/html");
+ }
+ return new MimeType("application/octet-stream");
+ }
+
+ @Override
+ public MimeType getMimeType(String path, InputStream is) {
+ if (path.endsWith(".pdf")) {
+ return new MimeType("application/pdf");
+ }
+ if (path.endsWith(".xml")) {
+ return new MimeType("application/xml");
+ }
+ if (path.endsWith(".html")) {
+ return new MimeType("text/html");
+ }
+ return new MimeType("application/octet-stream");
+ }
+
+ @Override
+ public boolean isSafeInline(MimeType type) {
+ return false;
+ }
+}
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
index bd1d87c..31c144d 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/InvalidLineEndingValidatorTest.java
@@ -19,6 +19,8 @@
import static com.googlesource.gerrit.plugins.uploadvalidator.TestUtils.PATTERN_CACHE;
import com.google.gerrit.server.git.validators.CommitValidationMessage;
+import com.google.gerrit.server.mime.MimeUtilFileTypeRegistry;
+import com.google.inject.Inject;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.revwalk.RevCommit;
@@ -55,7 +57,11 @@
RevCommit c = makeCommit(rw);
InvalidLineEndingValidator validator =
new InvalidLineEndingValidator(
- null, new ContentTypeUtil(PATTERN_CACHE), null, null, null);
+ null,
+ new ContentTypeUtil(PATTERN_CACHE, new FakeMimeUtilFileTypeRegistry()),
+ null,
+ null,
+ null);
List<CommitValidationMessage> m =
validator.performValidation(repo, c, rw, EMPTY_PLUGIN_CONFIG);
assertThat(TestUtils.transformMessages(m))