Markdown: inline images with data:image/*;base64,...

As a prevention against cross site scripting attacks, Gitiles
does not support serving raw files to browsers

Allow markdown document authors to display images contained inside
the Git repository by converting image files to a data:image/*;base64
URI that is embedded directly into the output HTML.

markdown.imageLimit defines an upper bound on how large of a source
image file will be embedded into the document. Embedded images are
disabled if set to 0.

Change-Id: I76651221e76f909eb4f088ca222f1105a6871e35
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
index ed78d4a..188d503 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
@@ -130,8 +130,15 @@
         }
       }
 
+      int imageLimit = cfg.getInt("markdown", "imageLimit", 256 << 10);
+      ImageLoader img = null;
+      if (imageLimit > 0) {
+        img = new ImageLoader(rw.getObjectReader(), view,
+            root, srcmd.path, imageLimit);
+      }
+
       res.setHeader(HttpHeaders.ETAG, curEtag);
-      showDoc(req, res, view, cfg, nav, doc);
+      showDoc(req, res, view, cfg, img, nav, doc);
     } finally {
       rw.release();
     }
@@ -164,10 +171,10 @@
   }
 
   private void showDoc(HttpServletRequest req, HttpServletResponse res,
-      GitilesView view, Config cfg,
+      GitilesView view, Config cfg, ImageLoader img,
       RootNode nav, RootNode doc) throws IOException {
     Map<String, Object> data = new HashMap<>();
-    data.putAll(Navbar.bannerSoyData(view, nav));
+    data.putAll(Navbar.bannerSoyData(view, img, nav));
     data.put("pageTitle", MoreObjects.firstNonNull(
         MarkdownUtil.getTitle(doc),
         view.getPathPart()));
@@ -175,7 +182,9 @@
     data.put("logUrl", GitilesView.log().copyFrom(view).toUrl());
     data.put("blameUrl", GitilesView.blame().copyFrom(view).toUrl());
     data.put("navbarHtml", new MarkdownToHtml(view, cfg).toSoyHtml(nav));
-    data.put("bodyHtml", new MarkdownToHtml(view, cfg).toSoyHtml(doc));
+    data.put("bodyHtml", new MarkdownToHtml(view, cfg)
+        .setImageLoader(img)
+        .toSoyHtml(doc));
 
     String page = renderer.render(SOY_TEMPLATE, data);
     byte[] raw = page.getBytes(UTF_8);
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/ImageLoader.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/ImageLoader.java
new file mode 100644
index 0000000..11c5e2f
--- /dev/null
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/ImageLoader.java
@@ -0,0 +1,120 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gitiles.doc;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.io.BaseEncoding;
+import com.google.gitiles.GitilesView;
+import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri;
+
+import org.eclipse.jgit.errors.LargeObjectException;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.revwalk.RevTree;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/** Reads an image from Git and converts to {@code data:image/*;base64,...} */
+public class ImageLoader {
+  private static final Logger log = LoggerFactory.getLogger(ImageLoader.class);
+
+  private final ObjectReader reader;
+  private final GitilesView view;
+  private final RevTree root;
+  private final String path;
+  private final int imageLimit;
+
+  public ImageLoader(ObjectReader reader, GitilesView view,
+      RevTree root, String path, int maxImageSize) {
+    this.reader = reader;
+    this.view = view;
+    this.root = root;
+    this.path = path;
+    this.imageLimit = maxImageSize;
+  }
+
+  String loadImage(String src) {
+    if (src.startsWith("/")) {
+      return readAndBase64Encode(src.substring(1));
+    }
+
+    String base = directory();
+    while (src.startsWith("../")) {
+      int s = base.lastIndexOf('/');
+      if (s == -1) {
+        return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+      }
+      base = base.substring(0, s + 1);
+      src = src.substring("../".length());
+    }
+    return readAndBase64Encode(base + src);
+  }
+
+  private String directory() {
+    int s = path.lastIndexOf('/');
+    if (s > 0) {
+      return path.substring(0, s + 1);
+    }
+    return "";
+  }
+
+  private String readAndBase64Encode(String path) {
+    String type = getMimeType(path);
+    if (type == null) {
+      return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+    }
+
+    try {
+      TreeWalk tw = TreeWalk.forPath(reader, path, root);
+      if (tw == null || tw.getFileMode(0) != FileMode.REGULAR_FILE) {
+        return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+      }
+
+      ObjectId id = tw.getObjectId(0);
+      byte[] raw = reader.open(id, Constants.OBJ_BLOB).getCachedBytes(imageLimit);
+      if (raw.length > imageLimit) {
+        return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+      }
+
+      return "data:" + type + ";base64," + BaseEncoding.base64().encode(raw);
+    } catch (LargeObjectException.ExceedsLimit e) {
+      return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+    } catch (IOException e) {
+      log.error(String.format("cannot read repo %s image %s from %s",
+          view.getRepositoryName(), path, root.name()), e);
+      return FilterImageDataUri.INSTANCE.getInnocuousOutput();
+    }
+  }
+
+  private static final ImmutableMap<String, String> TYPES = ImmutableMap.of(
+      "png", "image/png",
+      "gif", "image/gif",
+      "jpg", "image/jpeg",
+      "jpeg", "image/jpeg");
+
+  private static String getMimeType(String path) {
+    int d = path.lastIndexOf('.');
+    if (d == -1) {
+      return null;
+    }
+    String ext = path.substring(d + 1);
+    return TYPES.get(ext.toLowerCase());
+  }
+}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
index f2de56e..62bc846 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
@@ -20,7 +20,7 @@
 import com.google.gitiles.GitilesView;
 import com.google.gitiles.doc.html.HtmlBuilder;
 import com.google.template.soy.data.SanitizedContent;
-import com.google.template.soy.shared.restricted.EscapingConventions;
+import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri;
 
 import org.eclipse.jgit.lib.Config;
 import org.eclipse.jgit.util.StringUtils;
@@ -74,6 +74,7 @@
   private final TocFormatter toc = new TocFormatter(html, 3);
   private final GitilesView view;
   private final Config cfg;
+  private ImageLoader imageLoader;
   private TableState table;
 
   public MarkdownToHtml(GitilesView view, Config cfg) {
@@ -81,6 +82,11 @@
     this.cfg = cfg;
   }
 
+  public MarkdownToHtml setImageLoader(ImageLoader img) {
+    imageLoader = img;
+    return this;
+  }
+
   /** Render the document AST to sanitized HTML. */
   public SanitizedContent toSoyHtml(RootNode node) {
     if (node == null) {
@@ -303,7 +309,7 @@
   @Override
   public void visit(ExpImageNode node) {
     html.open("img")
-        .attribute("src", node.url)
+        .attribute("src", resolveImageUrl(node.url))
         .attribute("title", node.title)
         .attribute("alt", getInnerText(node));
   }
@@ -314,11 +320,11 @@
     String url, title = alt;
     ReferenceNode ref = references.get(node.referenceKey, alt);
     if (ref != null) {
-      url = ref.getUrl();
+      url = resolveImageUrl(ref.getUrl());
       title = ref.getTitle();
     } else {
       // If reference is missing, insert a broken image.
-      url = EscapingConventions.FilterImageDataUri.INSTANCE.getInnocuousOutput();
+      url = FilterImageDataUri.INSTANCE.getInnocuousOutput();
     }
     html.open("img")
         .attribute("src", url)
@@ -326,6 +332,15 @@
         .attribute("alt", alt);
   }
 
+  private String resolveImageUrl(String url) {
+    if (imageLoader == null
+        || url.startsWith("https://") || url.startsWith("http://")
+        || url.startsWith("data:")) {
+      return url;
+    }
+    return imageLoader.loadImage(url);
+  }
+
   @Override
   public void visit(TableNode node) {
     table = new TableState(node);
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/Navbar.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Navbar.java
index 1f54845..cec14b8 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/Navbar.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Navbar.java
@@ -17,6 +17,7 @@
 import com.google.gitiles.GitilesView;
 import com.google.gitiles.doc.html.HtmlBuilder;
 import com.google.template.soy.shared.restricted.Sanitizers;
+import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri;
 
 import org.pegdown.ast.HeaderNode;
 import org.pegdown.ast.Node;
@@ -28,7 +29,9 @@
 import java.util.Map;
 
 class Navbar {
-  static Map<String, Object> bannerSoyData(GitilesView view, RootNode nav) {
+  static Map<String, Object> bannerSoyData(
+      GitilesView view, ImageLoader img,
+      RootNode nav) {
     Map<String, Object> data = new HashMap<>();
     data.put("siteTitle", null);
     data.put("logoUrl", null);
@@ -55,10 +58,14 @@
       String url = r.getUrl();
       if ("logo".equalsIgnoreCase(key)) {
         Object src;
-        if (HtmlBuilder.isImageDataUri(url)) {
-          src = Sanitizers.filterImageDataUri(url);
-        } else {
+        if (HtmlBuilder.isValidHttpUri(url)) {
           src = url;
+        } else if (HtmlBuilder.isImageDataUri(url)) {
+          src = Sanitizers.filterImageDataUri(url);
+        } else if (img != null) {
+          src = img.loadImage(url);
+        } else {
+          src = FilterImageDataUri.INSTANCE.getInnocuousOutput();
         }
         data.put("logoUrl", src);
       } else if ("home".equalsIgnoreCase(key)) {
diff --git a/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css b/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
index 3b4de7d..9272763 100644
--- a/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
+++ b/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
@@ -151,6 +151,7 @@
 
 .doc img {
   border: 0;
+  max-width: 100%;
 }
 .doc iframe {
   min-width: 100px;