Replace a list of RegexFindReplaces in a single pass

Replacing in multiple passes, especially if each different
RegexFindReplaces come from different sources (e.g. users), can lead
to some confusing and likely dangerous behavior. Instead, replace in a
single pass by unioning patterns together and re-exec'ing the patterns
to see which one matched.

Change-Id: If218c9e47b3aca8ff665f1da844279aba3cbcc35
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
index b14d44e..34ce6f4 100644
--- a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
@@ -15,6 +15,8 @@
 package com.google.gwtexpui.safehtml.client;
 
 import com.google.gwt.core.client.GWT;
+import com.google.gwt.regexp.shared.MatchResult;
+import com.google.gwt.regexp.shared.RegExp;
 import com.google.gwt.user.client.DOM;
 import com.google.gwt.user.client.Element;
 import com.google.gwt.user.client.ui.HTML;
@@ -23,6 +25,8 @@
 import com.google.gwt.user.client.ui.InlineHTML;
 import com.google.gwt.user.client.ui.Widget;
 
+import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 /** Immutable string safely placed as HTML without further escaping. */
@@ -237,20 +241,59 @@
     return new SafeHtmlString(asString().replaceAll(regex, repl));
   }
 
+  private static class RegExpReplacement {
+    private final RegExp re;
+    private final String repl;
+
+    private RegExpReplacement(String pat, String repl) {
+      this.re = RegExp.compile(pat);
+      this.repl = repl;
+    }
+  }
+
   /**
-   * Go through the {@link RegexFindReplace} list, calling
-   * {@link #replaceAll(String,String)} on the HTML string for every
-   * find/replace pair in the list.
+   * Replace all find/replace pairs in the list in a single pass.
+   *
+   * @param findReplaceList find/replace pairs to use.
+   * @return a new string, after the replacements have been made.
    */
   public SafeHtml replaceAll(final List<RegexFindReplace> findReplaceList) {
     if (findReplaceList == null) {
       return this;
     }
-    String html = this.asString();
-    for (RegexFindReplace findReplace : findReplaceList) {
-      html = html.replaceAll(findReplace.find(), findReplace.replace());
+    List<RegExpReplacement> repls =
+        new ArrayList<RegExpReplacement>(findReplaceList.size());
+
+    StringBuilder pat = new StringBuilder();
+    Iterator<RegexFindReplace> it = findReplaceList.iterator();
+    while (it.hasNext()) {
+      RegexFindReplace fr = it.next();
+      repls.add(new RegExpReplacement(fr.find(), fr.replace()));
+      pat.append(fr.find());
+      if (it.hasNext()) {
+        pat.append('|');
+      }
     }
-    return new SafeHtmlString(html);
+
+    StringBuilder result = new StringBuilder();
+    RegExp re = RegExp.compile(pat.toString(), "g");
+    String orig = asString();
+    int index = 0;
+    MatchResult mat;
+    while ((mat = re.exec(orig)) != null) {
+      String g = mat.getGroup(0);
+      // Re-run each candidate to find which one matched.
+      for (RegExpReplacement repl : repls) {
+        if (repl.re.exec(g) != null) {
+          result.append(orig.substring(index, mat.getIndex()));
+          result.append(repl.re.replace(g, repl.repl));
+          index = mat.getIndex() + g.length();
+          break;
+        }
+      }
+    }
+    result.append(orig.substring(index, orig.length()));
+    return asis(result.toString());
   }
 
   /** @return a GWT block display widget displaying this HTML. */
diff --git a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
index 9cad57f..ec9d06f 100644
--- a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
+++ b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
@@ -17,40 +17,86 @@
 import junit.framework.TestCase;
 
 import java.util.Arrays;
+import java.util.List;
 
 public class SafeHtml_ReplaceTest extends TestCase {
-  public void testReplaceTwoLinks() {
-    final RegexFindReplace[] repl = {//
-        new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>") //
-        };
-    final SafeHtml o = html("A\nissue 42\nissue 9918\nB");
-    final SafeHtml n = o.replaceAll(Arrays.asList(repl));
+  public void testReplaceOneLink() {
+    SafeHtml o = html("A\nissue 42\nB");
+    SafeHtml n = o.replaceAll(repls(
+        new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
     assertNotSame(o, n);
-    assertEquals("A\n" //
-        + "<a href=\"?42\">issue 42</a>\n" //
-        + "<a href=\"?9918\">issue 9918</a>\n" //
-        + "B" //
+    assertEquals("A\n<a href=\"?42\">issue 42</a>\nB", n.asString());
+  }
+
+  public void testReplaceNoLeadingOrTrailingText() {
+    SafeHtml o = html("issue 42");
+    SafeHtml n = o.replaceAll(repls(
+        new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+    assertNotSame(o, n);
+    assertEquals("<a href=\"?42\">issue 42</a>", n.asString());
+  }
+
+  public void testReplaceTwoLinks() {
+    SafeHtml o = html("A\nissue 42\nissue 9918\nB");
+    SafeHtml n = o.replaceAll(repls(
+        new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+    assertNotSame(o, n);
+    assertEquals("A\n"
+        + "<a href=\"?42\">issue 42</a>\n"
+        + "<a href=\"?9918\">issue 9918</a>\n"
+        + "B"
     , n.asString());
   }
 
-  public void testReplaceInOrder1() {
-    final RegexFindReplace[] repl = {//
-            new RegexFindReplace("(GWTEXPUI-(\\d+))",
-                "<a href=\"gwtexpui-bug?$2\">$1</a>"), //
-            new RegexFindReplace("(issue\\s+(\\d+))",
-                "<a href=\"generic-bug?$2\">$1</a>"), //
-        };
-    final SafeHtml o = html("A\nissue 42\nReally GWTEXPUI-9918 is better\nB");
-    final SafeHtml n = o.replaceAll(Arrays.asList(repl));
+  public void testReplaceInOrder() {
+    SafeHtml o = html("A\nissue 42\nReally GWTEXPUI-9918 is better\nB");
+    SafeHtml n = o.replaceAll(repls(
+        new RegexFindReplace("(GWTEXPUI-(\\d+))",
+            "<a href=\"gwtexpui-bug?$2\">$1</a>"),
+        new RegexFindReplace("(issue\\s+(\\d+))",
+            "<a href=\"generic-bug?$2\">$1</a>")));
     assertNotSame(o, n);
-    assertEquals("A\n" //
-        + "<a href=\"generic-bug?42\">issue 42</a>\n" //
+    assertEquals("A\n"
+        + "<a href=\"generic-bug?42\">issue 42</a>\n"
         + "Really <a href=\"gwtexpui-bug?9918\">GWTEXPUI-9918</a> is better\n"
-        + "B" //
+        + "B"
     , n.asString());
   }
 
+  public void testReplaceOverlappingAfterFirstChar() {
+    SafeHtml o = html("abcd");
+    RegexFindReplace ab = new RegexFindReplace("ab", "AB");
+    RegexFindReplace bc = new RegexFindReplace("bc", "23");
+    RegexFindReplace cd = new RegexFindReplace("cd", "YZ");
+
+    assertEquals("ABcd", o.replaceAll(repls(ab, bc)).asString());
+    assertEquals("ABcd", o.replaceAll(repls(bc, ab)).asString());
+    assertEquals("ABYZ", o.replaceAll(repls(ab, bc, cd)).asString());
+  }
+
+  public void testReplaceOverlappingAtFirstCharLongestMatch() {
+    SafeHtml o = html("abcd");
+    RegexFindReplace ab = new RegexFindReplace("ab", "AB");
+    RegexFindReplace abc = new RegexFindReplace("[^d][^d][^d]", "234");
+
+    assertEquals("ABcd", o.replaceAll(repls(ab, abc)).asString());
+    assertEquals("234d", o.replaceAll(repls(abc, ab)).asString());
+  }
+
+  public void testReplaceOverlappingAtFirstCharFirstMatch() {
+    SafeHtml o = html("abcd");
+    RegexFindReplace ab1 = new RegexFindReplace("ab", "AB");
+    RegexFindReplace ab2 = new RegexFindReplace("[^cd][^cd]", "12");
+
+    assertEquals("ABcd", o.replaceAll(repls(ab1, ab2)).asString());
+    assertEquals("12cd", o.replaceAll(repls(ab2, ab1)).asString());
+  }
+
   private static SafeHtml html(String text) {
     return new SafeHtmlBuilder().append(text).toSafeHtml();
   }
+
+  private static List<RegexFindReplace> repls(RegexFindReplace... repls) {
+    return Arrays.asList(repls);
+  }
 }