Render blobs with server-side syntax highlighting

The client-side prettify.js is appealingly simple but does not
perform well on large files, particularly with blame, and can't be
made to work with layouts other than the simplest <ol>. Replace it
with a server-side implementation, using a straight Java port of
prettify.js[1].

The implementation is still very much Soy-based with a few small
optimizations to avoid unnecessary object allocation and copying in
inner loops, e.g. using SoyMapData directly instead of constructing
ImmutableMaps just to have Soy convert them internally. As a rough
benchmark, on my MacBook Air it can render a 750-line file in about
150ms and a 4500-line file in about 250ms. (This just counts the
rendering step after syntax highlighting.)

This was compared with two other implementations.

The first implementation wrote the whole <ol> directly to the
OutputStream with no allocations. This was very fast: about 10ms for
the same 750-line file. Unfortunately, this approach would be much
harder to make work with a table-based blame layout; the HTML in the
blame region half of the table is sufficiently complicated that we
would really like to use a template engine for that.

The second implementation built the contents of each <li> with as few
allocations as possible (e.g. reusing a single StringBuilder), then
passed the list of lines with pre-rendered spans to Soy to be rendered
in a list. This approach would work with table-based blame, since what
would be a <li> content in the blob view would be a <td> in the blame
view. This was only about 10-20% faster than the full Soy-based
approach, so judged not worth the additional code complexity.

It is also worth noting the client-side cost. Rendering the 750-line
file after receiving the content takes my Chrome about 250ms (180ms
in "Recalculate Style"), and about 400ms for 4500 lines. This bounds
the benefit of any server-side improvements: no matter what, there
will still be a visible pause when loading even a moderately complex
file.

[1] https://code.google.com/p/java-prettify/

Change-Id: I3d81c40e05d38c2c9431bc1c61ba59618756574e
diff --git a/gitiles-servlet/BUCK b/gitiles-servlet/BUCK
index a660aac..e10616c 100644
--- a/gitiles-servlet/BUCK
+++ b/gitiles-servlet/BUCK
@@ -10,6 +10,7 @@
   '//lib:joda-time',
   '//lib:gson',
   '//lib:commons-lang',
+  '//lib:prettify',
   '//lib/jgit:jgit',
   '//lib/jgit:jgit-servlet',
   '//lib/slf4j:slf4j-api',
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/BlobSoyData.java b/gitiles-servlet/src/main/java/com/google/gitiles/BlobSoyData.java
index 5c8d65f..ab5d89e 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/BlobSoyData.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/BlobSoyData.java
@@ -14,9 +14,13 @@
 
 package com.google.gitiles;
 
+import static com.google.common.base.Preconditions.checkState;
 import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
 
+import com.google.common.base.Strings;
 import com.google.common.collect.Maps;
+import com.google.template.soy.data.SoyListData;
+import com.google.template.soy.data.SoyMapData;
 
 import org.eclipse.jgit.diff.RawText;
 import org.eclipse.jgit.errors.LargeObjectException;
@@ -27,7 +31,12 @@
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.util.RawParseUtils;
 
+import prettify.PrettifyParser;
+import prettify.parser.Prettify;
+import syntaxhighlight.ParseResult;
+
 import java.io.IOException;
+import java.util.List;
 import java.util.Map;
 
 /** Soy data converter for git blobs. */
@@ -69,10 +78,10 @@
       content = null;
     }
 
-    data.put("data", content);
     if (content != null) {
-      data.put("lang", guessPrettifyLang(path, content));
+      data.put("lines", prettify(path, content));
     } else if (content == null) {
+      data.put("lines", null);
       data.put("size", Long.toString(loader.getSize()));
     }
     if (path != null && view.getRevision().getPeeledType() == OBJ_COMMIT) {
@@ -82,11 +91,74 @@
     return data;
   }
 
-  private static String guessPrettifyLang(String path, String content) {
+  private static SoyListData prettify(String path, String content) {
+    List<ParseResult> results = new PrettifyParser().parse(extension(path, content), content);
+    SoyListData lines = new SoyListData();
+    SoyListData line = new SoyListData();
+    lines.add(line);
+
+    int last = 0;
+    for (ParseResult r : results) {
+      checkState(r.getOffset() >= last,
+          "out-of-order ParseResult, expected %s >= %s", r.getOffset(), last);
+      line = writeResult(lines, null, content, last, r.getOffset());
+      last = r.getOffset() + r.getLength();
+      line = writeResult(lines, r.getStyleKeysString(), content, r.getOffset(), last);
+    }
+    if (last < content.length()) {
+      writeResult(lines, null, content, last, content.length());
+    }
+    return lines;
+  }
+
+  private static SoyListData writeResult(SoyListData lines, String classes,
+      String s, int start, int end) {
+    SoyListData line = lines.getListData(lines.length() - 1);
+    while (true) {
+      int nl = nextLineBreak(s, start, end);
+      if (nl < 0) {
+        break;
+      }
+      addSpan(line, classes, s, start, nl);
+
+      start = nl + (isCrNl(s, nl) ? 2 : 1);
+      if (start == s.length()) {
+        return null;
+      }
+      line = new SoyListData();
+      lines.add(line);
+    }
+    addSpan(line, classes, s, start, end);
+    return line;
+  }
+
+  private static void addSpan(SoyListData line, String classes, String s, int start, int end) {
+    if (end - start > 0) {
+      if (Strings.isNullOrEmpty(classes)) {
+        classes = Prettify.PR_PLAIN;
+      }
+      line.add(new SoyMapData("classes", classes, "text", s.substring(start, end)));
+    }
+  }
+
+  private static boolean isCrNl(String s, int n) {
+    return s.charAt(n) == '\r' && n != s.length() - 1 && s.charAt(n + 1) == '\n';
+  }
+
+  private static int nextLineBreak(String s, int start, int end) {
+    for (int i = start; i < end; i++) {
+      if (s.charAt(i) == '\n' || s.charAt(i) == '\r') {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  private static String extension(String path, String content) {
     if (content.startsWith("#!/bin/sh") || content.startsWith("#!/bin/bash")) {
       return "sh";
     } else if (content.startsWith("#!/usr/bin/perl")) {
-      return "perl";
+      return "pl";
     } else if (content.startsWith("#!/usr/bin/python")) {
       return "py";
     } else if (path == null) {
@@ -95,16 +167,16 @@
 
     int slash = path.lastIndexOf('/');
     int dot = path.lastIndexOf('.');
-    String lang = ((0 < dot) && (slash < dot)) ? path.substring(dot + 1) : null;
-    if ("txt".equalsIgnoreCase(lang)) {
+    String ext = ((0 < dot) && (slash < dot)) ? path.substring(dot + 1) : null;
+    if ("txt".equalsIgnoreCase(ext)) {
       return null;
-    } else if ("mk".equalsIgnoreCase(lang)) {
+    } else if ("mk".equalsIgnoreCase(ext)) {
       return "sh";
     } else if ("Makefile".equalsIgnoreCase(path)
         || ((0 < slash) && "Makefile".equalsIgnoreCase(path.substring(slash + 1)))) {
       return "sh";
     } else {
-      return lang;
+      return ext;
     }
   }
 }
diff --git a/gitiles-servlet/src/main/resources/com/google/gitiles/static/gitiles.css b/gitiles-servlet/src/main/resources/com/google/gitiles/static/gitiles.css
index 0dbd477..f3a3a35 100644
--- a/gitiles-servlet/src/main/resources/com/google/gitiles/static/gitiles.css
+++ b/gitiles-servlet/src/main/resources/com/google/gitiles/static/gitiles.css
@@ -399,25 +399,16 @@
 
 /* Override some styles from the default prettify.css. */
 
-/* Line numbers on all lines. */
-li.L0, li.L1, li.L2, li.L3, li.L4, li.L5, li.L6, li.L7, li.L8, li.L9 {
-  list-style-type: decimal;
-}
-
-/* Disable alternating line background color. */
-li.L0, li.L1, li.L2, li.L3, li.L4, li.L5, li.L6, li.L7, li.L8, li.L9 {
-  background: #fff;
-}
-
-pre.git-blob {
+ol.prettyprint {
   border-top: #ddd solid 1px; /* BORDER */
   border-bottom: #ddd solid 1px; /* BORDER */
   border-left: none;
   border-right: none;
-  padding-left: 1em;
+  padding-left: 5em;
   padding-bottom: 5px;
   font-family: monospace;
   font-size: 8pt;
+  white-space: pre;
 }
 pre.prettyprint ol {
   color: grey;
diff --git a/gitiles-servlet/src/main/resources/com/google/gitiles/templates/ObjectDetail.soy b/gitiles-servlet/src/main/resources/com/google/gitiles/templates/ObjectDetail.soy
index 66a0d75..edb3100 100644
--- a/gitiles-servlet/src/main/resources/com/google/gitiles/templates/ObjectDetail.soy
+++ b/gitiles-servlet/src/main/resources/com/google/gitiles/templates/ObjectDetail.soy
@@ -214,8 +214,7 @@
  * @param sha SHA of this file's blob.
  * @param? logUrl optional URL to a log for this file.
  * @param? blameUrl optional URL to a blame for this file.
- * @param data file data (may be empty), or null for a binary file.
- * @param? lang prettyprint language extension for text file.
+ * @param lines lines in the file (may be empty), or null for a binary file.
  * @param? size for binary files only, size in bytes.
  */
 {template .blobDetail}
@@ -231,18 +230,23 @@
 /**
  * Preformatted box containing blob contents.
  *
- * @param data file data (may be empty), or null for a binary file.
- * @param? lang prettyprint language extension for text file.
+ * @param lines lines (may be empty), or null for a binary file. Each line
+ *     is a list of entries with "classes" and "text" fields for pretty-printed
+ *     spans.
  * @param? size for binary files only, size in bytes.
  */
 {template .blobBox}
-  {if $data != null}
-    {if $data}
-      {if $lang}
-        <pre class="git-blob prettyprint linenums lang-{$lang}">{$data}</pre>
-      {else}
-        <pre class="git-blob prettyprint linenums">{$data}</pre>
-      {/if}
+  {if $lines != null}
+    {if $lines}
+      <ol class="prettyprint">
+        {foreach $line in $lines}
+          <li>
+            {foreach $span in $line}
+              <span class="{$span.classes}">{$span.text}</span>
+            {/foreach}
+          </li>
+        {/foreach}
+      </ol>
     {else}
       <div class="file-empty">Empty file</div>
     {/if}
diff --git a/lib/BUCK b/lib/BUCK
index 3faf33f..abed916 100644
--- a/lib/BUCK
+++ b/lib/BUCK
@@ -49,6 +49,13 @@
 )
 
 maven_jar(
+  name = 'prettify',
+  id = 'prettify:java-prettify:1.2.1',
+  sha1 = '29ad8d072f9d0b83d1a2e9aa6ccb0905e6d543c6',
+  repository = GERRIT,
+)
+
+maven_jar(
   name = 'junit',
   id = 'junit:junit:4.11',
   sha1 = '4e031bb61df09069aeb2bffb4019e7a5034a4ee0',