Replace tika by overviewproject mime-types to detect mime types Use the org.overviewproject:mime-types:0.1.3 library [1] instead of Apache Tika to detect mime-types. This library is a successor of the original project mime-util [2] which seems to be unmaintained. It supports the mime-types supported by shared-mime-info [3]. Compare to Apache Tika the overviewproject mime-types library has a much smaller scope and only cares about detecting mime-types whereas Tika provides a lot more functionality which is not needed in the uploadvalidator plugin. The size of the mime-types library is only 58kB compared to 708kB for the tika-core library. [1] https://github.com/overview/mime-types [2] http://sourceforge.net/p/mime-util/ [3] https://gitlab.freedesktop.org/xdg/shared-mime-info Change-Id: Iecdae64b2039bd6717926e1b6dcb1f59b04a593e
diff --git a/BUILD b/BUILD index 2d26f72..6df464f 100644 --- a/BUILD +++ b/BUILD
@@ -13,7 +13,7 @@ ], resources = glob(["src/main/resources/**/*"]), deps = [ - "@tika-core//jar", + "@mime-types//jar", ], ) @@ -21,7 +21,7 @@ TEST_DEPS = PLUGIN_DEPS + PLUGIN_TEST_DEPS + [ "@commons-io//jar", - "@tika-core//jar", + "@mime-types//jar", ":uploadvalidator__plugin", ]
diff --git a/external_plugin_deps.bzl b/external_plugin_deps.bzl index 8d85336..6ef4aa8 100644 --- a/external_plugin_deps.bzl +++ b/external_plugin_deps.bzl
@@ -7,7 +7,7 @@ sha1 = "83b5b8a7ba1c08f9e8c8ff2373724e33d3c1e22a", ) maven_jar( - name = "tika-core", - artifact = "org.apache.tika:tika-core:1.24.1", - sha1 = "703e65fb300d1425d4ad7b68c21c7795bb7a95c3", - ) + name = "mime-types", + artifact = "org.overviewproject:mime-types:0.1.3", + sha1 = "63ebd860cdad2f8a5fec89ae3238970607d943a3", + ) \ No newline at end of file
diff --git a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java index 6e6ad98..bb6f9db 100644 --- a/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java +++ b/src/main/java/com/googlesource/gerrit/plugins/uploadvalidator/ContentTypeUtil.java
@@ -29,11 +29,9 @@ import java.io.InputStream; import java.util.concurrent.ExecutionException; import java.util.regex.Pattern; -import org.apache.tika.detect.DefaultDetector; -import org.apache.tika.detect.Detector; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; import org.eclipse.jgit.lib.ObjectLoader; +import org.overviewproject.mime_types.GetBytesException; +import org.overviewproject.mime_types.MimeTypeDetector; public class ContentTypeUtil { private static final String KEY_BINARY_TYPES = "binaryTypes"; @@ -68,7 +66,7 @@ } private final LoadingCache<String, Pattern> patternCache; - private final Detector detector = new DefaultDetector(); + private final MimeTypeDetector detector = new MimeTypeDetector(); @Inject ContentTypeUtil(@Named(CACHE_NAME) LoadingCache<String, Pattern> patternCache) { @@ -83,9 +81,11 @@ } public String getContentType(InputStream is, String pathname) throws IOException { - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, pathname); - return detector.detect(TikaInputStream.get(is), metadata).toString(); + try { + return detector.detectMimeType(pathname, is); + } catch (GetBytesException e) { + throw new IOException(e); + } } @VisibleForTesting
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md index 4402118..eb47d6a 100644 --- a/src/main/resources/Documentation/config.md +++ b/src/main/resources/Documentation/config.md
@@ -109,7 +109,7 @@ If there is a NUL byte in the first 8k then the file will be considered binary regardless of this setting. - To detect content types [Apache Tika library][2] is used. + To detect content types the [overviewproject mime-types library][2] is used. Content type can be specified as a string, wildcard or a regular expression, for example: @@ -154,8 +154,9 @@ This check does not run on [binary files][4] [1]: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html -[2]: https://tika.apache.org/ -[3]: https://tika.apache.org/1.12/formats.html#Full_list_of_Supported_Formats +[2]: https://github.com/overview/mime-types is a successor of the original + project http://sourceforge.net/p/mime-util/ which seems to be unmaintained +[3]: supports the mime types supported by https://gitlab.freedesktop.org/xdg/shared-mime-info [4]: #binary_type plugin.@PLUGIN@.blockedContentType @@ -164,7 +165,7 @@ This check looks for blocked content types. If the check finds a blocked content type the push will be rejected. - To detect content types [Apache Tika library][2] is used. + To detect content types the [overviewproject mime-types library][2] is used. Content type can be specified as a string, wildcard or a regular expression, for example:
diff --git a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java index eb3820f..040ae6c 100644 --- a/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java +++ b/src/test/java/com/googlesource/gerrit/plugins/uploadvalidator/MimeTypeDetection.java
@@ -16,19 +16,17 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.apache.tika.detect.DefaultDetector; -import org.apache.tika.detect.Detector; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; +import org.overviewproject.mime_types.GetBytesException; +import org.overviewproject.mime_types.MimeTypeDetector; class MimeTypeDetection { public String getMimeType(String path, byte[] content) throws IOException { - Detector detector = new DefaultDetector(); - - Metadata metadata = new Metadata(); - metadata.set(Metadata.RESOURCE_NAME_KEY, path); - + MimeTypeDetector detector = new MimeTypeDetector(); ByteArrayInputStream bis = new ByteArrayInputStream(content); - return detector.detect(TikaInputStream.get(bis), metadata).toString(); + try { + return detector.detectMimeType(path, bis); + } catch (GetBytesException e) { + throw new IOException(e); + } } }