Markdown: optionally allow limited <iframe> tags

Allow the Gitiles administrator to set markdown.allowiframe to
a list of http:// or https:// URL prefixes that are considered
trustworthy enough to be embedded inside of iframes within the
markdown served by this Gitiles instance.

Implement a new strict parser for the <iframe> element inside of
the markdown extension, pulling out only the src, height and width
attributes. Other iframe attributes will cause the entire element
to be recognized as raw HTML and dropped by the parser and formatter.

Apply strict validation on the src attribute, dropping the iframe
if it is not acceptable.

Change-Id: I7d5decd9f0dbfa2acf1e4f59e571ac5518067d4a
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
index e6d37a0..ed78d4a 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
@@ -131,7 +131,7 @@
       }
 
       res.setHeader(HttpHeaders.ETAG, curEtag);
-      showDoc(req, res, view, nav, doc);
+      showDoc(req, res, view, cfg, nav, doc);
     } finally {
       rw.release();
     }
@@ -164,7 +164,8 @@
   }
 
   private void showDoc(HttpServletRequest req, HttpServletResponse res,
-      GitilesView view, RootNode nav, RootNode doc) throws IOException {
+      GitilesView view, Config cfg,
+      RootNode nav, RootNode doc) throws IOException {
     Map<String, Object> data = new HashMap<>();
     data.putAll(Navbar.bannerSoyData(view, nav));
     data.put("pageTitle", MoreObjects.firstNonNull(
@@ -173,8 +174,8 @@
     data.put("sourceUrl", GitilesView.show().copyFrom(view).toUrl());
     data.put("logUrl", GitilesView.log().copyFrom(view).toUrl());
     data.put("blameUrl", GitilesView.blame().copyFrom(view).toUrl());
-    data.put("navbarHtml", new MarkdownToHtml(view).toSoyHtml(nav));
-    data.put("bodyHtml", new MarkdownToHtml(view).toSoyHtml(doc));
+    data.put("navbarHtml", new MarkdownToHtml(view, cfg).toSoyHtml(nav));
+    data.put("bodyHtml", new MarkdownToHtml(view, cfg).toSoyHtml(doc));
 
     String page = renderer.render(SOY_TEMPLATE, data);
     byte[] raw = page.getBytes(UTF_8);
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesMarkdown.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesMarkdown.java
index 040326b..9557c54 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesMarkdown.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesMarkdown.java
@@ -77,6 +77,7 @@
     return new Rule[] {
         cols(),
         hr(),
+        iframe(),
         note(),
         toc(),
     };
@@ -96,6 +97,35 @@
         push(new SimpleNode(SimpleNode.Type.HRule)));
   }
 
+  public Rule iframe() {
+    StringBuilderVar src = new StringBuilderVar();
+    StringBuilderVar h = new StringBuilderVar();
+    StringBuilderVar w = new StringBuilderVar();
+    StringBuilderVar b = new StringBuilderVar();
+    return NodeSequence(
+        string("<iframe"),
+        oneOrMore(
+          sequence(
+            Spn1(),
+            firstOf(
+              sequence(string("src="), attribute(src)),
+              sequence(string("height="), attribute(h)),
+              sequence(string("width="), attribute(w)),
+              sequence(string("frameborder="), attribute(w))
+            ))),
+        Spn1(), '>',
+        Spn1(), string("</iframe>"),
+        push(new IframeNode(src.getString(),
+            h.getString(), w.getString(),
+            b.getString())));
+  }
+
+  public Rule attribute(StringBuilderVar var) {
+    return firstOf(
+      sequence('"', zeroOrMore(testNot('"'), ANY), var.append(match()), '"'),
+      sequence('\'', zeroOrMore(testNot('\''), ANY), var.append(match()), '\''));
+  }
+
   public Rule note() {
     StringBuilderVar body = new StringBuilderVar();
     return NodeSequence(
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/IframeNode.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/IframeNode.java
new file mode 100644
index 0000000..f4d1ca3
--- /dev/null
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/IframeNode.java
@@ -0,0 +1,47 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gitiles.doc;
+
+import com.google.common.base.Strings;
+
+import org.pegdown.ast.AbstractNode;
+import org.pegdown.ast.Node;
+
+import java.util.Collections;
+import java.util.List;
+
+class IframeNode extends AbstractNode {
+  final String src;
+  final String height;
+  final String width;
+  final boolean border;
+
+  IframeNode(String src, String height, String width, String border) {
+    this.src = src;
+    this.height = Strings.emptyToNull(height);
+    this.width = Strings.emptyToNull(width);
+    this.border = !"0".equals(border);
+  }
+
+  @Override
+  public void accept(org.pegdown.ast.Visitor visitor) {
+    ((Visitor) visitor).visit(this);
+  }
+
+  @Override
+  public List<Node> getChildren() {
+    return Collections.emptyList();
+  }
+}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
index c1c7d4f..f2de56e 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
@@ -22,6 +22,8 @@
 import com.google.template.soy.data.SanitizedContent;
 import com.google.template.soy.shared.restricted.EscapingConventions;
 
+import org.eclipse.jgit.lib.Config;
+import org.eclipse.jgit.util.StringUtils;
 import org.pegdown.ast.AbbreviationNode;
 import org.pegdown.ast.AutoLinkNode;
 import org.pegdown.ast.BlockQuoteNode;
@@ -71,10 +73,12 @@
   private final HtmlBuilder html = new HtmlBuilder();
   private final TocFormatter toc = new TocFormatter(html, 3);
   private final GitilesView view;
+  private final Config cfg;
   private TableState table;
 
-  public MarkdownToHtml(GitilesView view) {
+  public MarkdownToHtml(GitilesView view, Config cfg) {
     this.view = view;
+    this.cfg = cfg;
   }
 
   /** Render the document AST to sanitized HTML. */
@@ -127,6 +131,36 @@
   }
 
   @Override
+  public void visit(IframeNode node) {
+    if (HtmlBuilder.isValidHttpUri(node.src)
+        && HtmlBuilder.isValidCssDimension(node.height)
+        && HtmlBuilder.isValidCssDimension(node.width)
+        && canRender(node)) {
+      html.open("iframe")
+          .attribute("src", node.src)
+          .attribute("height", node.height)
+          .attribute("width", node.width);
+      if (!node.border) {
+        html.attribute("class", "noborder");
+      }
+      html.close("iframe");
+    }
+  }
+
+  private boolean canRender(IframeNode node) {
+    String[] ok = cfg.getStringList("markdown", null, "allowiframe");
+    if (ok.length == 1 && StringUtils.toBooleanOrNull(ok[0]) == Boolean.TRUE) {
+      return true;
+    }
+    for (String m : ok) {
+      if (m.equals(node.src) || (m.endsWith("/") && node.src.startsWith(m))) {
+        return true;
+      }
+    }
+    return false; // By default do not render iframe.
+  }
+
+  @Override
   public void visit(HeaderNode node) {
     String tag = "h" + node.getLevel();
     html.open(tag);
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java
index 8e9ea60..e91e073 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/Visitor.java
@@ -17,5 +17,6 @@
 public interface Visitor extends org.pegdown.ast.Visitor {
   void visit(ColsNode node);
   void visit(DivNode node);
+  void visit(IframeNode node);
   void visit(TocNode node);
 }
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
index 093f901..f5dc889 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
@@ -46,7 +46,8 @@
       "a", "div", "img", "p", "blockquote", "pre",
       "ol", "ul", "li", "dl", "dd", "dt",
       "del", "em", "strong", "code", "br", "hr",
-      "table", "thead", "tbody", "caption", "tr", "th", "td"
+      "table", "thead", "tbody", "caption", "tr", "th", "td",
+      "iframe"
   );
 
   private static final ImmutableSet<String> ALLOWED_ATTRIBUTES = ImmutableSet.of(
@@ -58,6 +59,17 @@
   private static final FilterNormalizeUri URI = FilterNormalizeUri.INSTANCE;
   private static final FilterImageDataUri IMAGE_DATA = FilterImageDataUri.INSTANCE;
 
+  public static boolean isValidCssDimension(String val) {
+    return val != null && val.matches("(?:[1-9][0-9]*px|100%|[1-9][0-9]?%)");
+  }
+
+  public static boolean isValidHttpUri(String val) {
+    return (val.startsWith("https://")
+        || val.startsWith("http://")
+        || val.startsWith("//"))
+        && URI.getValueFilter().matcher(val).find();
+  }
+
   /** Check if URL is valid for {@code <img src="data:image/*;base64,...">}. */
   public static boolean isImageDataUri(String url) {
     return IMAGE_DATA.getValueFilter().matcher(url).find();
@@ -89,6 +101,13 @@
       val = anchorHref(val);
     } else if ("src".equals(att) && "img".equals(tag)) {
       val = imgSrc(val);
+    } else if ("src".equals(att) && "iframe".equals(tag)) {
+      if (!isValidHttpUri(val)) {
+        return this;
+      }
+      val = URI.escape(val);
+    } else if (("height".equals(att) || "width".equals(att)) && "iframe".equals(tag)) {
+      val = isValidCssDimension(val) ? val : "250px";
     } else if ("alt".equals(att) && "img".equals(tag)) {
       // allow
     } else if ("title".equals(att) && ("img".equals(tag) || "a".equals(tag))) {
@@ -119,8 +138,7 @@
   }
 
   private static String imgSrc(String val) {
-    if ((val.startsWith("http:") || val.startsWith("https:"))
-        && URI.getValueFilter().matcher(val).find()) {
+    if (isValidHttpUri(val)) {
       return URI.escape(val);
     }
     if (isImageDataUri(val)) {
diff --git a/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css b/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
index 36e10dd..3b4de7d 100644
--- a/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
+++ b/gitiles-servlet/src/main/resources/com/google/gitiles/static/doc.css
@@ -152,6 +152,13 @@
 .doc img {
   border: 0;
 }
+.doc iframe {
+  min-width: 100px;
+  min-height: 30px;
+}
+iframe.noborder {
+  border: 0;
+}
 
 .doc pre {
   border: 1px solid silver;