Merge branch 'stable-3.0' into stable-3.1
* stable-3.0:
Replace tika by overviewproject mime-types to detect mime types
Upgrade bazlets to latest stable-3.0 to build with 3.0.13 API
Avoid any uses or lookups of ffmpeg by Tika
Change-Id: I5623b878e7f8bcdec44f768a91487463a86d3a16
diff --git a/BUILD b/BUILD
index d73e553..d74886f 100644
--- a/BUILD
+++ b/BUILD
@@ -12,7 +12,7 @@
],
resources = glob(["src/main/resources/**/*"]),
deps = [
- "@tika-core//jar",
+ "@mime-types//jar",
],
)
@@ -20,7 +20,7 @@
TEST_DEPS = PLUGIN_DEPS + PLUGIN_TEST_DEPS + [
"@commons-io//jar",
- "@tika-core//jar",
+ "@mime-types//jar",
":uploadvalidator__plugin",
]
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl
index 8d85336..6ef4aa8 100644
--- a/external_plugin_deps.bzl
+++ b/external_plugin_deps.bzl
@@ -7,7 +7,7 @@
sha1 = "83b5b8a7ba1c08f9e8c8ff2373724e33d3c1e22a",
)
maven_jar(
- name = "tika-core",
- artifact = "org.apache.tika:tika-core:1.24.1",
- sha1 = "703e65fb300d1425d4ad7b68c21c7795bb7a95c3",
- )
+ name = "mime-types",
+ artifact = "org.overviewproject:mime-types:0.1.3",
+ sha1 = "63ebd860cdad2f8a5fec89ae3238970607d943a3",
+ )
\ No newline at end of file
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
index 67ae289..bb6f9db 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -29,11 +29,9 @@
import java.io.InputStream;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
import org.eclipse.jgit.lib.ObjectLoader;
+import org.overviewproject.mime_types.GetBytesException;
+import org.overviewproject.mime_types.MimeTypeDetector;
public class ContentTypeUtil {
private static final String KEY_BINARY_TYPES = "binaryTypes";
@@ -68,7 +66,7 @@
}
private final LoadingCache<String, Pattern> patternCache;
- private final Tika tika = new Tika(TikaConfig.getDefaultConfig());
+ private final MimeTypeDetector detector = new MimeTypeDetector();
@Inject
ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) {
@@ -83,9 +81,11 @@
}
public String getContentType(InputStream is, String pathname) throws IOException {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, pathname);
- return tika.detect(TikaInputStream.get(is), metadata);
+ try {
+ return detector.detectMimeType(pathname, is);
+ } catch (GetBytesException e) {
+ throw new IOException(e);
+ }
}
@VisibleForTesting
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index 5a2e50c..86f2015 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -109,7 +109,7 @@
If there is a NUL byte in the first 8k then the file will be considered
binary regardless of this setting.
- To detect content types [Apache Tika library][2] is used.
+ To detect content types the [overviewproject mime-types library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
@@ -154,8 +154,9 @@
This check does not run on [binary files][4]
[1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
-[2]: https://tika.apache.org/
-[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats
+[2]: https://github.com/overview/mime-types is a successor of the original
+ project http://sourceforge.net/p/mime-util/ which seems to be unmaintained
+[3]: supports the mime types supported by https://gitlab.freedesktop.org/xdg/shared-mime-info
[4]: #binary_type
plugin.@PLUGIN@.blockedContentType
@@ -164,7 +165,7 @@
This check looks for blocked content types. If the check finds a
blocked content type the push will be rejected.
- To detect content types [Apache Tika library][2] is used.
+ To detect content types the [overviewproject mime-types library][2] is used.
Content type can be specified as a string, wildcard or a regular expression,
for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
index ac38404..040ae6c 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
@@ -16,19 +16,17 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
+import org.overviewproject.mime_types.GetBytesException;
+import org.overviewproject.mime_types.MimeTypeDetector;
class MimeTypeDetection {
public String getMimeType(String path, byte[] content) throws IOException {
- Tika tika = new Tika(TikaConfig.getDefaultConfig());
-
- Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, path);
-
+ MimeTypeDetector detector = new MimeTypeDetector();
ByteArrayInputStream bis = new ByteArrayInputStream(content);
- return tika.detect(TikaInputStream.get(bis), metadata);
+ try {
+ return detector.detectMimeType(path, bis);
+ } catch (GetBytesException e) {
+ throw new IOException(e);
+ }
}
}