Include the mime-util library to guess file MIME types
The mime-util project is an open source, APLv2 library providing
support for MIME content type detection on files through both
the /etc/mime.types and /etc/magic.mime file formats. We can use
it within CatServlet to determine what sort of file we are about
to serve to the client, so the client gets a proper Content-Type
header in the HTTP response.
Reviewed-by: Cedric Beust <cbeust@google.com>
Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/Documentation/licenses.txt b/Documentation/licenses.txt
index 4a4b197..71cca10 100644
--- a/Documentation/licenses.txt
+++ b/Documentation/licenses.txt
@@ -29,6 +29,7 @@
OpenXRI <<apache2,Apache License 2.0>>
Neko HTML <<apache2,Apache License 2.0>>
Ehcache <<apache2,Apache License 2.0>>
+mime-util <<apache2,Apache License 2.0>>
ICU4J <<icu4j,ICU4J License>>
JGit <<jgit,New-Style BSD>>
JSch <<sshd,New-Style BSD>>
diff --git a/pom.xml b/pom.xml
index 98c9841..c922c94 100644
--- a/pom.xml
+++ b/pom.xml
@@ -543,6 +543,12 @@
</dependency>
<dependency>
+ <groupId>eu.medsea.mimeutil</groupId>
+ <artifactId>mime-util</artifactId>
+ <version>2.1.2</version>
+ </dependency>
+
+ <dependency>
<groupId>bouncycastle</groupId>
<artifactId>bcpg-jdk15</artifactId>
<version>140</version>
diff --git a/src/main/java/com/google/gerrit/server/CatServlet.java b/src/main/java/com/google/gerrit/server/CatServlet.java
index 024cb04..2f3839c 100644
--- a/src/main/java/com/google/gerrit/server/CatServlet.java
+++ b/src/main/java/com/google/gerrit/server/CatServlet.java
@@ -28,6 +28,8 @@
import com.google.gwtjsonrpc.server.XsrfException;
import com.google.gwtorm.client.OrmException;
+import eu.medsea.mimeutil.MimeType;
+
import org.spearce.jgit.lib.Constants;
import org.spearce.jgit.lib.ObjectId;
import org.spearce.jgit.lib.Repository;
@@ -61,10 +63,10 @@
*/
@SuppressWarnings("serial")
public class CatServlet extends HttpServlet {
- private static final String APPLICATION_OCTET_STREAM =
- "application/octet-stream";
+ private static final MimeType ZIP = new MimeType("application/zip");
private GerritServer server;
private SecureRandom rng;
+ private FileTypeRegistry registry;
@Override
public void init(final ServletConfig config) throws ServletException {
@@ -77,6 +79,7 @@
throw new ServletException("Cannot load GerritServer", e);
}
rng = new SecureRandom();
+ registry = FileTypeRegistry.getInstance();
}
@Override
@@ -219,11 +222,11 @@
}
final long when = fromCommit.getCommitTime() * 1000L;
- String contentType = guessContentType(project, path, blobData);
+ MimeType contentType = registry.getMimeType(path, blobData);
final String fn;
final byte[] outData;
- if (isSafeInline(contentType)) {
+ if (registry.isSafeInline(contentType)) {
fn = safeFileName(path, suffix);
outData = blobData;
@@ -248,11 +251,11 @@
zo.close();
outData = zip.toByteArray();
- contentType = "application/zip";
+ contentType = ZIP;
fn = safeFileName(path, suffix) + ".zip";
}
- rsp.setContentType(contentType);
+ rsp.setContentType(contentType.toString());
rsp.setContentLength(outData.length);
rsp.setDateHeader("Last-Modified", when);
rsp.setHeader("Content-Disposition", "attachment; filename=\"" + fn + "\"");
@@ -262,27 +265,6 @@
rsp.getOutputStream().write(outData);
}
- private String guessContentType(final Project project, final String path,
- final byte[] content) {
- // When in doubt, call it a generic binary stream.
- //
- return APPLICATION_OCTET_STREAM;
- }
-
- private boolean isSafeInline(final String contentType) {
- if (APPLICATION_OCTET_STREAM.equals(contentType)) {
- // Most browsers perform content type sniffing when they get told
- // a generic content type. This is bad, so assume we cannot send
- // the file inline.
- //
- return false;
- }
-
- // Assume we cannot send the content inline.
- //
- return false;
- }
-
private static String safeFileName(String fileName, final String suffix) {
// Convert a file path (e.g. "src/Init.c") to a safe file name with
// no meta-characters that might be unsafe on any given platform.
diff --git a/src/main/java/com/google/gerrit/server/FileTypeRegistry.java b/src/main/java/com/google/gerrit/server/FileTypeRegistry.java
new file mode 100644
index 0000000..2545aaa
--- /dev/null
+++ b/src/main/java/com/google/gerrit/server/FileTypeRegistry.java
@@ -0,0 +1,142 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.server;
+
+import eu.medsea.mimeutil.MimeException;
+import eu.medsea.mimeutil.MimeType;
+import eu.medsea.mimeutil.MimeUtil2;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class FileTypeRegistry {
+ private static final Logger log =
+ LoggerFactory.getLogger(FileTypeRegistry.class);
+ private static final FileTypeRegistry INSTANCE = new FileTypeRegistry();
+
+ /** Get the global registry. */
+ public static FileTypeRegistry getInstance() {
+ return INSTANCE;
+ }
+
+ private MimeUtil2 mimeUtil;
+
+ private FileTypeRegistry() {
+ mimeUtil = new MimeUtil2();
+ register("eu.medsea.mimeutil.detector.ExtensionMimeDetector");
+ register("eu.medsea.mimeutil.detector.MagicMimeMimeDetector");
+ if (isWin32()) {
+ register("eu.medsea.mimeutil.detector.WindowsRegistryMimeDetector");
+ }
+ }
+
+ private void register(String name) {
+ mimeUtil.registerMimeDetector(name);
+ }
+
+ private static boolean isWin32() {
+ final String osDotName =
+ AccessController.doPrivileged(new PrivilegedAction<String>() {
+ public String run() {
+ return System.getProperty("os.name");
+ }
+ });
+ return osDotName != null
+ && osDotName.toLowerCase().indexOf("windows") != -1;
+ }
+
+ /**
+ * Get the most specific MIME type available for a file.
+ *
+ * @param path name of the file. The base name (component after the last '/')
+ * may be used to help determine the MIME type, such as by examining
+ * the extension (portion after the last '.' if present).
+ * @param content the complete file content. If non-null the content may be
+ * used to guess the MIME type by examining the beginning for common
+ * file headers.
+ * @return the MIME type for this content. If the MIME type is not recognized
+ * or cannot be determined, {@link MimeUtil2#UNKNOWN_MIME_TYPE} which
+ * is an alias for {@code application/octet-stream}.
+ */
+ public MimeType getMimeType(final String path, final byte[] content) {
+ Set<MimeType> mimeTypes = new HashSet<MimeType>();
+ if (content != null && content.length > 0) {
+ try {
+ mimeTypes.addAll(mimeUtil.getMimeTypes(content));
+ } catch (MimeException e) {
+ log.warn("Unable to determine MIME type from content", e);
+ }
+ }
+ try {
+ mimeTypes.addAll(mimeUtil.getMimeTypes(path));
+ } catch (MimeException e) {
+ log.warn("Unable to determine MIME type from path", e);
+ }
+
+ if (isUnknownType(mimeTypes)) {
+ return MimeUtil2.UNKNOWN_MIME_TYPE;
+ }
+
+ final List<MimeType> types = new ArrayList<MimeType>(mimeTypes);
+ Collections.sort(types, new Comparator<MimeType>() {
+ @Override
+ public int compare(MimeType a, MimeType b) {
+ return b.getSpecificity() - a.getSpecificity();
+ }
+ });
+ return types.get(0);
+ }
+
+ /**
+ * Is this content type safe to transmit to a browser directly?
+ *
+ * @param contentType the MIME type of the file content.
+ * @return true if the Gerrit administrator wants to permit this content to be
+ * served as-is; false if the administrator does not trust this
+ * content type and wants it to be protected (typically by wrapping
+ * the data in a ZIP archive).
+ */
+ public boolean isSafeInline(final MimeType contentType) {
+ if (MimeUtil2.UNKNOWN_MIME_TYPE.equals(contentType)) {
+ // Most browsers perform content type sniffing when they get told
+ // a generic content type. This is bad, so assume we cannot send
+ // the file inline.
+ //
+ return false;
+ }
+
+ // Assume we cannot send the content inline.
+ //
+ return false;
+ }
+
+ private static boolean isUnknownType(Collection<MimeType> mimeTypes) {
+ if (mimeTypes.isEmpty()) {
+ return true;
+ }
+ return mimeTypes.size() == 1
+ && mimeTypes.contains(MimeUtil2.UNKNOWN_MIME_TYPE);
+ }
+}
diff --git a/src/main/java/log4j.properties b/src/main/java/log4j.properties
index 27d6547..a99be50 100644
--- a/src/main/java/log4j.properties
+++ b/src/main/java/log4j.properties
@@ -24,6 +24,10 @@
log4j.logger.org.apache.sshd.server=WARN
log4j.logger.org.apache.sshd.common.keyprovider.FileKeyPairProvider=INFO
+# Silence non-critical messages from mime-util.
+#
+log4j.logger.eu.medsea.mimeutil=WARN
+
# Silence non-critical messages from openid4java
#
log4j.logger.org.apache.xml=WARN