Add support for glue to permit some block HTML in Markdown

Allow a RootedDocServlet created by an integration application to
override the way MarkdownToHtml handles HtmlBlock AST nodes, casting
them into SafeHtml that can be appended directly to the HtmlBuilder.

This can allow an integrator to pass the user content through an HTML
sanitizer, and pass-through a subset it believes to be safe.

The default still drops everything on the floor, and it's not possible
to configure Gitiles to pass-through HTML by default.  Application
code changes are required to invoke the proper RootedDocServlet
constructor.

Change-Id: Id412bc8670d2c25f36c8486e0d3600d1cb9d0710
diff --git a/gitiles-dev/BUILD b/gitiles-dev/BUILD
index e58623d..2552321 100644
--- a/gitiles-dev/BUILD
+++ b/gitiles-dev/BUILD
@@ -5,6 +5,7 @@
     deps = [
         "//gitiles-servlet:servlet",
         "//lib:guava",
+        "//lib:html-types",
         "//lib:servlet-api_3_0",
         "//lib/jetty:server",
         "//lib/jetty:servlet",
diff --git a/gitiles-dev/src/main/java/com/google/gitiles/dev/DevServer.java b/gitiles-dev/src/main/java/com/google/gitiles/dev/DevServer.java
index 35a594f..000b0cb 100644
--- a/gitiles-dev/src/main/java/com/google/gitiles/dev/DevServer.java
+++ b/gitiles-dev/src/main/java/com/google/gitiles/dev/DevServer.java
@@ -18,12 +18,14 @@
 import static com.google.gitiles.GitilesServlet.STATIC_PREFIX;
 
 import com.google.common.base.Strings;
+import com.google.common.html.types.UncheckedConversions;
 import com.google.gitiles.DebugRenderer;
 import com.google.gitiles.GitilesAccess;
 import com.google.gitiles.GitilesServlet;
 import com.google.gitiles.PathServlet;
 import com.google.gitiles.RepositoryDescription;
 import com.google.gitiles.RootedDocServlet;
+import com.google.gitiles.doc.HtmlSanitizer;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
@@ -176,7 +178,17 @@
           }
         };
 
-    return new RootedDocServlet(resolver, new RootedDocAccess(docRepo), renderer);
+    HtmlSanitizer.Factory htmlSanitizer = HtmlSanitizer.DISABLED_FACTORY;
+    if (cfg.getBoolean("markdown", "unsafeAllowUserContentHtmlInDevMode", false)) {
+      log.warn("!!! Allowing unsafe user content HTML in Markdown !!!");
+      htmlSanitizer =
+          request ->
+              rawUnsafeHtml ->
+                  // Yes, this is evil. It's not known the input was safe.
+                  // I'm a development server to test Gitiles, not a cop.
+                  UncheckedConversions.safeHtmlFromStringKnownToSatisfyTypeContract(rawUnsafeHtml);
+    }
+    return new RootedDocServlet(resolver, new RootedDocAccess(docRepo), renderer, htmlSanitizer);
   }
 
   private class RootedDocAccess implements GitilesAccess.Factory {
diff --git a/gitiles-servlet/BUILD b/gitiles-servlet/BUILD
index bc67ebd..142e14c 100644
--- a/gitiles-servlet/BUILD
+++ b/gitiles-servlet/BUILD
@@ -9,6 +9,7 @@
     "//lib:commons-lang3",
     "//lib:gson",
     "//lib:guava",
+    "//lib:html-types",
     "//lib:joda-time",
     "//lib:jsr305",
     "//lib:commonmark",
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/RootedDocServlet.java b/gitiles-servlet/src/main/java/com/google/gitiles/RootedDocServlet.java
index 90c6e07..96329b1 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/RootedDocServlet.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/RootedDocServlet.java
@@ -17,6 +17,7 @@
 import static org.eclipse.jgit.http.server.ServletUtils.ATTRIBUTE_REPOSITORY;
 
 import com.google.gitiles.doc.DocServlet;
+import com.google.gitiles.doc.HtmlSanitizer;
 import java.io.IOException;
 import javax.servlet.ServletConfig;
 import javax.servlet.ServletException;
@@ -48,8 +49,16 @@
       RepositoryResolver<HttpServletRequest> resolver,
       GitilesAccess.Factory accessFactory,
       Renderer renderer) {
+    this(resolver, accessFactory, renderer, HtmlSanitizer.DISABLED_FACTORY);
+  }
+
+  public RootedDocServlet(
+      RepositoryResolver<HttpServletRequest> resolver,
+      GitilesAccess.Factory accessFactory,
+      Renderer renderer,
+      HtmlSanitizer.Factory htmlSanitizer) {
     this.resolver = resolver;
-    docServlet = new DocServlet(accessFactory, renderer);
+    docServlet = new DocServlet(accessFactory, renderer, htmlSanitizer);
   }
 
   @Override
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
index 001b3b2..1a4fce9 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/DocServlet.java
@@ -70,8 +70,16 @@
   // files are automatically hashed as part of the ETag.
   private static final int ETAG_GEN = 5;
 
+  private final HtmlSanitizer.Factory htmlSanitizer;
+
   public DocServlet(GitilesAccess.Factory accessFactory, Renderer renderer) {
+    this(accessFactory, renderer, HtmlSanitizer.DISABLED_FACTORY);
+  }
+
+  public DocServlet(
+      GitilesAccess.Factory accessFactory, Renderer renderer, HtmlSanitizer.Factory htmlSanitizer) {
     super(renderer, accessFactory);
+    this.htmlSanitizer = htmlSanitizer;
   }
 
   @Override
@@ -128,7 +136,8 @@
               .setGitilesView(view)
               .setRequestUri(req.getRequestURI())
               .setReader(reader)
-              .setRootTree(root);
+              .setRootTree(root)
+              .setHtmlSanitizer(htmlSanitizer.create(req));
       Navbar navbar = createNavbar(cfg, fmt, navmd);
       res.setHeader(HttpHeaders.ETAG, curEtag);
       showDoc(req, res, view, fmt, navbar, srcmd);
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesHtmlExtension.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesHtmlExtension.java
index 834f5fd..e92a52f 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesHtmlExtension.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/GitilesHtmlExtension.java
@@ -123,9 +123,6 @@
         return;
       }
     }
-
-    // Discard potentially unsafe HtmlInline.
-    curr.unlink();
   }
 
   private static boolean isAnchorClose(Node n) {
@@ -148,9 +145,6 @@
         }
       }
     }
-
-    // Discard potentially unsafe HtmlBlock.
-    curr.unlink();
   }
 
   private static IframeBlock iframe(String html) {
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/HtmlSanitizer.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/HtmlSanitizer.java
new file mode 100644
index 0000000..0c23ce0
--- /dev/null
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/HtmlSanitizer.java
@@ -0,0 +1,32 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gitiles.doc;
+
+import com.google.common.html.types.SafeHtml;
+import javax.servlet.http.HttpServletRequest;
+
+/** Verifies a user content HTML block is safe. */
+public interface HtmlSanitizer {
+  public static final HtmlSanitizer DISABLED = unused -> SafeHtml.EMPTY;
+  public static final Factory DISABLED_FACTORY = req -> DISABLED;
+
+  /** Verifies the supplied block is safe, or returns {@link SafeHtml#EMPTY}. */
+  SafeHtml sanitize(String html);
+
+  /** Creates an {@link HtmlSanitizer} for this request. */
+  public interface Factory {
+    HtmlSanitizer create(HttpServletRequest req);
+  }
+}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
index b88f4fd..8bcb919 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/MarkdownToHtml.java
@@ -17,6 +17,7 @@
 import static com.google.gitiles.doc.MarkdownUtil.getInnerText;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Strings;
 import com.google.gitiles.GitilesView;
 import com.google.gitiles.ThreadSafePrettifyParser;
@@ -81,6 +82,7 @@
     private String filePath;
     private ObjectReader reader;
     private RevTree root;
+    private HtmlSanitizer htmlSanitizer = HtmlSanitizer.DISABLED;
 
     Builder() {}
 
@@ -114,6 +116,11 @@
       return this;
     }
 
+    public Builder setHtmlSanitizer(HtmlSanitizer htmlSanitizer) {
+      this.htmlSanitizer = MoreObjects.firstNonNull(htmlSanitizer, HtmlSanitizer.DISABLED);
+      return this;
+    }
+
     public MarkdownToHtml build() {
       return new MarkdownToHtml(this);
     }
@@ -125,17 +132,23 @@
   private final GitilesView view;
   private final MarkdownConfig config;
   private final String filePath;
+  private final HtmlSanitizer htmlSanitizer;
   private final ImageLoader imageLoader;
   private boolean outputNamedAnchor = true;
 
-  private MarkdownToHtml(Builder b) {
+  protected MarkdownToHtml(Builder b) {
     requestUri = b.requestUri;
     view = b.view;
     config = b.config;
     filePath = b.filePath;
+    htmlSanitizer = b.htmlSanitizer;
     imageLoader = newImageLoader(b);
   }
 
+  protected HtmlBuilder html() {
+    return html;
+  }
+
   private static ImageLoader newImageLoader(Builder b) {
     if (b.reader != null && b.view != null && b.config != null && b.root != null) {
       return new ImageLoader(b.reader, b.view, b.config, b.root);
@@ -501,12 +514,12 @@
 
   @Override
   public void visit(HtmlInline node) {
-    // Discard all HTML.
+    // Discard inline HTML, as it's always partial tags.
   }
 
   @Override
   public void visit(HtmlBlock node) {
-    // Discard all HTML.
+    html.append(htmlSanitizer.sanitize(node.getLiteral()));
   }
 
   private void wrapChildren(String tag, Node node) {
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
index b8f43b0..8235161 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/doc/html/HtmlBuilder.java
@@ -15,10 +15,12 @@
 package com.google.gitiles.doc.html;
 
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.html.types.SafeHtml;
 import com.google.gitiles.doc.RuntimeIOException;
 import com.google.template.soy.shared.restricted.EscapingConventions.EscapeHtml;
 import com.google.template.soy.shared.restricted.EscapingConventions.FilterImageDataUri;
@@ -256,6 +258,17 @@
     }
   }
 
+  /** Append a previously determined to be safe HTML fragment. */
+  public void append(SafeHtml html) {
+    checkNotNull(html, "SafeHtml");
+    finishActiveTag();
+    try {
+      htmlBuf.append(html.getSafeHtmlString());
+    } catch (IOException e) {
+      throw new RuntimeIOException(e);
+    }
+  }
+
   /** Finish the document. */
   public void finish() {
     finishActiveTag();
diff --git a/lib/BUILD b/lib/BUILD
index fe4988e..917f5f0 100644
--- a/lib/BUILD
+++ b/lib/BUILD
@@ -12,6 +12,7 @@
     "cm-autolink",
     "gfm-strikethrough",
     "gfm-tables",
+    "html-types",
     "jsr305",
     "joda-time",
     "servlet-api_2_5",