Merge branch 'stable-3.1' into stable-3.2 * stable-3.1: Replace tika by overviewproject mime-types to detect mime types Upgrade bazlets to latest stable-3.0 to build with 3.0.13 API Avoid any uses or lookups of ffmpeg by Tika Change-Id: I4ea7a359bdab7aea526e56ad3ead7bef0846d12b
diff --git a/BUILD b/BUILD index d73e553..d74886f 100644 --- a/BUILD +++ b/BUILD
@@ -12,7 +12,7 @@ ], resources = glob(["src/main/resources/**/*"]), deps = [ - "@tika-core//jar", + "@mime-types//jar", ], ) @@ -20,7 +20,7 @@ TEST_DEPS = PLUGIN_DEPS + PLUGIN_TEST_DEPS + [ "@commons-io//jar", - "@tika-core//jar", + "@mime-types//jar", ":uploadvalidator__plugin", ]
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl index 8d85336..6ef4aa8 100644 --- a/external_plugin_deps.bzl +++ b/external_plugin_deps.bzl
@@ -7,7 +7,7 @@ sha1 = "83b5b8a7ba1c08f9e8c8ff2373724e33d3c1e22a", ) maven_jar( - name = "tika-core", - artifact = "org.apache.tika:tika-core:1.24.1", - sha1 = "703e65fb300d1425d4ad7b68c21c7795bb7a95c3", - ) + name = "mime-types", + artifact = "org.overviewproject:mime-types:0.1.3", + sha1 = "63ebd860cdad2f8a5fec89ae3238970607d943a3", + ) \ No newline at end of file
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java index 67ae289..bb6f9db 100644 --- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java +++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -29,11 +29,9 @@ import java.io.InputStream; import java.util.concurrent.ExecutionException; import java.util.regex.Pattern; -import org.apache.tika.Tika; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; import org.eclipse.jgit.lib.ObjectLoader; +import org.overviewproject.mime_types.GetBytesException; +import org.overviewproject.mime_types.MimeTypeDetector; public class ContentTypeUtil { private static final String KEY_BINARY_TYPES = "binaryTypes"; @@ -68,7 +66,7 @@ } private final LoadingCache<String, Pattern> patternCache; - private final Tika tika = new Tika(TikaConfig.getDefaultConfig()); + private final MimeTypeDetector detector = new MimeTypeDetector(); @Inject ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) { @@ -83,9 +81,11 @@ } public String getContentType(InputStream is, String pathname) throws IOException { - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, pathname); - return tika.detect(TikaInputStream.get(is), metadata); + try { + return detector.detectMimeType(pathname, is); + } catch (GetBytesException e) { + throw new IOException(e); + } } @VisibleForTesting
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md index 5a2e50c..86f2015 100644 --- a/src/main/resources/Documentation/config.md +++ b/src/main/resources/Documentation/config.md
@@ -109,7 +109,7 @@ If there is a NUL byte in the first 8k then the file will be considered binary regardless of this setting. - To detect content types [Apache Tika library][2] is used. + To detect content types the [overviewproject mime-types library][2] is used. Content type can be specified as a string, wildcard or a regular expression, for example: @@ -154,8 +154,9 @@ This check does not run on [binary files][4] [1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html -[2]: https://tika.apache.org/ -[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats +[2]: https://github.com/overview/mime-types is a successor of the original + project http://sourceforge.net/p/mime-util/ which seems to be unmaintained +[3]: supports the mime types supported by https://gitlab.freedesktop.org/xdg/shared-mime-info [4]: #binary_type plugin.@PLUGIN@.blockedContentType @@ -164,7 +165,7 @@ This check looks for blocked content types. If the check finds a blocked content type the push will be rejected. - To detect content types [Apache Tika library][2] is used. + To detect content types the [overviewproject mime-types library][2] is used. Content type can be specified as a string, wildcard or a regular expression, for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java index ac38404..040ae6c 100644 --- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java +++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
@@ -16,19 +16,17 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.apache.tika.Tika; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; +import org.overviewproject.mime_types.GetBytesException; +import org.overviewproject.mime_types.MimeTypeDetector; class MimeTypeDetection { public String getMimeType(String path, byte[] content) throws IOException { - Tika tika = new Tika(TikaConfig.getDefaultConfig()); - - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, path); - + MimeTypeDetector detector = new MimeTypeDetector(); ByteArrayInputStream bis = new ByteArrayInputStream(content); - return tika.detect(TikaInputStream.get(bis), metadata); + try { + return detector.detectMimeType(path, bis); + } catch (GetBytesException e) { + throw new IOException(e); + } } }