Linkify commit messages using regexp-based rules

This commit adds a new config option to gitiles.
Similar to Gerrit commentlinks[1], there may be one or more config
sections "commentlink", which will have a 'match' and link name.
If the regular expression as specified in 'match' matches, the part of
the text will be linkified with the 'link' value, which may contain
references to the groups of the matching regular expression.

Until now there was just one regular expression covering both standard
web links as well gerrit ChangeId links. We cannot put the configurable
comment links into that regex as well, because there you can have
references to groups within the regular expression and resolving the
references within one large regular expression would get quite messy.

Therefore this commit introduces a new class CommentLinkInfo,
which will do the actual replacing for one configurable or
the web links or the change ids.

Note: this commit does not bring gerrits 'html' option. This only brings
in 'match' and 'link'.

[1] http://gerrit-documentation.googlecode.com/svn/Documentation/2.5.1/config-gerrit.html#_a_id_commentlink_a_section_commentlink

Change-Id: Ic4d75aa55d648fc492559ff13b1dbebf2a1ff4b5
Signed-off-by: Stefan Beller <sbeller@google.com>
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/CommentLinkInfo.java b/gitiles-servlet/src/main/java/com/google/gitiles/CommentLinkInfo.java
new file mode 100644
index 0000000..536d149
--- /dev/null
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/CommentLinkInfo.java
@@ -0,0 +1,90 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gitiles;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Converts commit message text to soy data in accordance with
+ * a commentlink rule.
+ * <p>
+ * Example:
+ * <pre>
+ *  new CommentLinkInfo(
+ *      Pattern.compile("bug \d+"),
+ *      "http://bugs/$1")
+ *    .linkify("do something nice\n\nbug 5")
+ * </pre>
+ * <p>
+ * returns a list of soy data objects:
+ * <pre>
+ * ImmutableList.of(
+ *   ImmutableMap.of("text", "do something nice\n\n"),
+ *   ImmutableMap.of("text", "bug 5", "url", "http://bugs/5")
+ * )
+ * </pre>
+ */
+public class CommentLinkInfo {
+  private final Pattern pattern;
+  private final String link;
+
+  public CommentLinkInfo(Pattern pattern, String link) {
+    this.pattern = checkNotNull(pattern);
+    this.link = checkNotNull(link);
+  }
+
+  public List<Map<String, String>> linkify(String input) {
+    List<Map<String, String>> parsed = Lists.newArrayList();
+    Matcher m = pattern.matcher(input);
+    int last = 0;
+    while (m.find()) {
+      addText(parsed, input.substring(last, m.start()));
+      String text = m.group(0);
+      addLink(parsed, text, pattern.matcher(text).replaceAll(link));
+      last = m.end();
+    }
+    addText(parsed, input.substring(last));
+    return ImmutableList.copyOf(parsed);
+  }
+
+  private static void addLink(List<Map<String, String>> parts, String text, String url) {
+    parts.add(ImmutableMap.of("text", text, "url", url));
+  }
+
+  private static void addText(List<Map<String, String>> parts, String text) {
+    if (text.isEmpty()) {
+      return;
+    }
+    if (parts.isEmpty()) {
+      parts.add(ImmutableMap.of("text", text));
+    } else {
+      Map<String, String> old = parts.get(parts.size() - 1);
+      if (!old.containsKey("url")) {
+        parts.set(parts.size() - 1, ImmutableMap.of("text", old.get("text") + text));
+      } else {
+        parts.add(ImmutableMap.of("text", text));
+      }
+    }
+  }
+}
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/GitilesFilter.java b/gitiles-servlet/src/main/java/com/google/gitiles/GitilesFilter.java
index 3d548b0..b38c9c6 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/GitilesFilter.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/GitilesFilter.java
@@ -296,7 +296,7 @@
   private synchronized Linkifier linkifier() {
     if (linkifier == null) {
       checkState(urls != null, "GitilesUrls not yet set");
-      linkifier = new Linkifier(urls);
+      linkifier = new Linkifier(urls, config);
     }
     return linkifier;
   }
diff --git a/gitiles-servlet/src/main/java/com/google/gitiles/Linkifier.java b/gitiles-servlet/src/main/java/com/google/gitiles/Linkifier.java
index a9673b4..1a49a41 100644
--- a/gitiles-servlet/src/main/java/com/google/gitiles/Linkifier.java
+++ b/gitiles-servlet/src/main/java/com/google/gitiles/Linkifier.java
@@ -17,11 +17,19 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
+import org.eclipse.jgit.lib.Config;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -29,7 +37,11 @@
 
 /** Linkifier for blocks of text such as commit message descriptions. */
 public class Linkifier {
-  private static final Pattern LINK_PATTERN;
+  private static final Logger log = LoggerFactory.getLogger(Linkifier.class);
+
+  private static final String COMMENTLINK = "commentlink";
+  private static final Pattern HTTP_URL_PATTERN;
+  private static final Pattern CHANGE_ID_PATTERN;
 
   static {
     // HTTP URL regex adapted from com.google.gwtexpui.safehtml.client.SafeHtml.
@@ -41,59 +53,85 @@
         part + "{2,}" +
         "(?:[(]" + part + "*" + "[)])*" +
         part + "*";
-    String changeId = "\\bI[0-9a-f]{8,40}\\b";
-    LINK_PATTERN = Pattern.compile(Joiner.on("|").join(
-        "(" + httpUrl + ")",
-        "(" + changeId + ")"));
+    HTTP_URL_PATTERN = Pattern.compile(httpUrl);
+    CHANGE_ID_PATTERN = Pattern.compile("(\\bI[0-9a-f]{8,40}\\b)");
   }
 
   private final GitilesUrls urls;
+  private final List<CommentLinkInfo> commentLinks;
+  private final Pattern allPattern;
 
-  public Linkifier(GitilesUrls urls) {
+  public Linkifier(GitilesUrls urls, Config config) {
     this.urls = checkNotNull(urls, "urls");
+
+    List<CommentLinkInfo> list = new ArrayList<>();
+    list.add(new CommentLinkInfo(HTTP_URL_PATTERN, "$0"));
+    Set<String> subsections = config.getSubsections(COMMENTLINK);
+
+    List<String> patterns = new ArrayList<>();
+
+    patterns.add(HTTP_URL_PATTERN.pattern());
+    patterns.add(CHANGE_ID_PATTERN.pattern());
+
+    for (String subsection : subsections) {
+      String match = config.getString("commentlink", subsection, "match");
+      String link = config.getString("commentlink", subsection, "link");
+      String html = config.getString("commentlink", subsection, "html");
+      if (html != null) {
+        log.warn("Beware: html in commentlinks is unsupported in gitiles; "
+               + "Did you copy it from a gerrit config?");
+      }
+      if (Strings.isNullOrEmpty(match)) {
+        log.warn("invalid commentlink.%s.match", subsection);
+        continue;
+      }
+      if (Strings.isNullOrEmpty(link)) {
+        log.warn("invalid commentlink.%s.link", subsection);
+        continue;
+      }
+      Pattern pattern = Pattern.compile(match);
+      list.add(new CommentLinkInfo(pattern, link));
+      patterns.add(match);
+    }
+    this.commentLinks = Collections.unmodifiableList(list);
+    allPattern = Pattern.compile(Joiner.on('|').join(patterns));
   }
 
   public List<Map<String, String>> linkify(HttpServletRequest req, String message) {
+
+    List<CommentLinkInfo> operationalCommentLinks = new ArrayList<>(commentLinks);
+    // Because we're relying on 'req' as a dynamic parameter, we need to construct
+    // the CommentLinkInfo for ChangeIds on the fly.
     String baseGerritUrl = urls.getBaseGerritUrl(req);
+
+    if (baseGerritUrl != null) {
+      CommentLinkInfo changeIds =
+          new CommentLinkInfo(CHANGE_ID_PATTERN, baseGerritUrl + "#/q/$0,n,z");
+      operationalCommentLinks.add(changeIds);
+    }
+
     List<Map<String, String>> parsed = Lists.newArrayList();
-    Matcher m = LINK_PATTERN.matcher(message);
-    int last = 0;
-    while (m.find()) {
-      addText(parsed, message.substring(last, m.start()));
-      if (m.group(1) != null) {
-        // Bare URL.
-        parsed.add(link(m.group(1), m.group(1)));
-      } else if (m.group(2) != null) {
-        if (baseGerritUrl != null) {
-          // Gerrit Change-Id.
-          parsed.add(link(m.group(2), baseGerritUrl + "#/q/" + m.group(2) + ",n,z"));
-        } else {
-          addText(parsed, m.group(2));
+    parsed.add(ImmutableMap.of("text", message));
+
+    for (int index = 0; index < parsed.size(); index++) {
+      if (parsed.get(index).get("url") != null) {
+        continue;
+      }
+      Matcher m = allPattern.matcher(parsed.get(index).get("text"));
+      if (!m.find()) {
+        continue;
+      }
+
+      for (CommentLinkInfo cli : operationalCommentLinks) {
+        // No need to apply more rules if this is already a link.
+        if (parsed.get(index).get("url") != null) {
+          break;
         }
+        String text = parsed.get(index).get("text");
+        parsed.remove(index);
+        parsed.addAll(index, cli.linkify(text));
       }
-      last = m.end();
     }
-    addText(parsed, message.substring(last));
     return parsed;
   }
-
-  private static Map<String, String> link(String text, String url) {
-    return ImmutableMap.of("text", text, "url", url);
-  }
-
-  private static void addText(List<Map<String, String>> parts, String text) {
-    if (text.isEmpty()) {
-      return;
-    }
-    if (parts.isEmpty()) {
-      parts.add(ImmutableMap.of("text", text));
-    } else {
-      Map<String, String> old = parts.get(parts.size() - 1);
-      if (!old.containsKey("url")) {
-        parts.set(parts.size() - 1, ImmutableMap.of("text", old.get("text") + text));
-      } else {
-        parts.add(ImmutableMap.of("text", text));
-      }
-    }
-  }
 }
diff --git a/gitiles-servlet/src/test/java/com/google/gitiles/LinkifierTest.java b/gitiles-servlet/src/test/java/com/google/gitiles/LinkifierTest.java
index fa70bc6..9a04f9d 100644
--- a/gitiles-servlet/src/test/java/com/google/gitiles/LinkifierTest.java
+++ b/gitiles-servlet/src/test/java/com/google/gitiles/LinkifierTest.java
@@ -19,6 +19,7 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 
+import org.eclipse.jgit.lib.Config;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -32,14 +33,16 @@
 
   @Test
   public void linkifyMessageNoMatch() throws Exception {
-    Linkifier l = new Linkifier(TestGitilesUrls.URLS);
+    Config config = new Config();
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
     assertEquals(ImmutableList.of(ImmutableMap.of("text", "some message text")),
         l.linkify(FakeHttpServletRequest.newRequest(), "some message text"));
   }
 
   @Test
   public void linkifyMessageUrl() throws Exception {
-    Linkifier l = new Linkifier(TestGitilesUrls.URLS);
+    Config config = new Config();
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
     assertEquals(ImmutableList.of(
         ImmutableMap.of("text", "http://my/url", "url", "http://my/url")),
         l.linkify(REQ, "http://my/url"));
@@ -62,6 +65,7 @@
 
   @Test
   public void linkifyMessageChangeIdNoGerrit() throws Exception {
+    Config config = new Config();
     Linkifier l = new Linkifier(new GitilesUrls() {
       @Override
       public String getBaseGerritUrl(HttpServletRequest req) {
@@ -77,7 +81,7 @@
       public String getBaseGitUrl(HttpServletRequest req) {
         throw new UnsupportedOperationException();
       }
-    });
+    }, config);
     assertEquals(ImmutableList.of(ImmutableMap.of("text", "I0123456789")),
         l.linkify(REQ, "I0123456789"));
     assertEquals(ImmutableList.of(ImmutableMap.of("text", "Change-Id: I0123456789")),
@@ -88,7 +92,8 @@
 
   @Test
   public void linkifyMessageChangeId() throws Exception {
-    Linkifier l = new Linkifier(TestGitilesUrls.URLS);
+    Config config = new Config();
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
     assertEquals(ImmutableList.of(
         ImmutableMap.of("text", "I0123456789",
           "url", "http://test-host-review/foo/#/q/I0123456789,n,z")),
@@ -107,8 +112,25 @@
   }
 
   @Test
+  public void linkifyMessageCommentLinks() throws Exception {
+    Config config = new Config();
+    config.setString("commentlink", "buglink", "match", "(bug\\s+#?)(\\d+)");
+    config.setString("commentlink", "buglink", "link", "http://bugs/$2");
+    config.setString("commentlink", "featurelink", "match", "(Feature:\\s+)(\\d+)");
+    config.setString("commentlink", "featurelink", "link", "http://features/$2");
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
+    assertEquals(ImmutableList.of(
+        ImmutableMap.of("text", "There is a new "),
+        ImmutableMap.of("text", "Feature: 103", "url", "http://features/103"),
+        ImmutableMap.of("text", ", which is similar to the reported "),
+        ImmutableMap.of("text", "bug 100", "url", "http://bugs/100")),
+        l.linkify(REQ, "There is a new Feature: 103, which is similar to the reported bug 100"));
+  }
+
+  @Test
   public void linkifyMessageUrlAndChangeId() throws Exception {
-    Linkifier l = new Linkifier(TestGitilesUrls.URLS);
+    Config config = new Config();
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
     assertEquals(ImmutableList.of(
         ImmutableMap.of("text", "http://my/url/I0123456789", "url", "http://my/url/I0123456789"),
         ImmutableMap.of("text", " is not change "),
@@ -119,7 +141,8 @@
 
   @Test
   public void linkifyAmpersand() throws Exception {
-    Linkifier l = new Linkifier(TestGitilesUrls.URLS);
+    Config config = new Config();
+    Linkifier l = new Linkifier(TestGitilesUrls.URLS, config);
     assertEquals(ImmutableList.of(
         ImmutableMap.of("text", "http://my/url?a&b", "url", "http://my/url?a&b")),
         l.linkify(REQ, "http://my/url?a&b"));