Replace a list of RegexFindReplaces in a single pass
Replacing in multiple passes, especially if each different
RegexFindReplaces come from different sources (e.g. users), can lead
to some confusing and likely dangerous behavior. Instead, replace in a
single pass by unioning patterns together and re-exec'ing the patterns
to see which one matched.
Change-Id: If218c9e47b3aca8ff665f1da844279aba3cbcc35
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
index b14d44e..34ce6f4 100644
--- a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
@@ -15,6 +15,8 @@
package com.google.gwtexpui.safehtml.client;
import com.google.gwt.core.client.GWT;
+import com.google.gwt.regexp.shared.MatchResult;
+import com.google.gwt.regexp.shared.RegExp;
import com.google.gwt.user.client.DOM;
import com.google.gwt.user.client.Element;
import com.google.gwt.user.client.ui.HTML;
@@ -23,6 +25,8 @@
import com.google.gwt.user.client.ui.InlineHTML;
import com.google.gwt.user.client.ui.Widget;
+import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
/** Immutable string safely placed as HTML without further escaping. */
@@ -237,20 +241,59 @@
return new SafeHtmlString(asString().replaceAll(regex, repl));
}
+ private static class RegExpReplacement {
+ private final RegExp re;
+ private final String repl;
+
+ private RegExpReplacement(String pat, String repl) {
+ this.re = RegExp.compile(pat);
+ this.repl = repl;
+ }
+ }
+
/**
- * Go through the {@link RegexFindReplace} list, calling
- * {@link #replaceAll(String,String)} on the HTML string for every
- * find/replace pair in the list.
+ * Replace all find/replace pairs in the list in a single pass.
+ *
+ * @param findReplaceList find/replace pairs to use.
+ * @return a new string, after the replacements have been made.
*/
public SafeHtml replaceAll(final List<RegexFindReplace> findReplaceList) {
if (findReplaceList == null) {
return this;
}
- String html = this.asString();
- for (RegexFindReplace findReplace : findReplaceList) {
- html = html.replaceAll(findReplace.find(), findReplace.replace());
+ List<RegExpReplacement> repls =
+ new ArrayList<RegExpReplacement>(findReplaceList.size());
+
+ StringBuilder pat = new StringBuilder();
+ Iterator<RegexFindReplace> it = findReplaceList.iterator();
+ while (it.hasNext()) {
+ RegexFindReplace fr = it.next();
+ repls.add(new RegExpReplacement(fr.find(), fr.replace()));
+ pat.append(fr.find());
+ if (it.hasNext()) {
+ pat.append('|');
+ }
}
- return new SafeHtmlString(html);
+
+ StringBuilder result = new StringBuilder();
+ RegExp re = RegExp.compile(pat.toString(), "g");
+ String orig = asString();
+ int index = 0;
+ MatchResult mat;
+ while ((mat = re.exec(orig)) != null) {
+ String g = mat.getGroup(0);
+ // Re-run each candidate to find which one matched.
+ for (RegExpReplacement repl : repls) {
+ if (repl.re.exec(g) != null) {
+ result.append(orig.substring(index, mat.getIndex()));
+ result.append(repl.re.replace(g, repl.repl));
+ index = mat.getIndex() + g.length();
+ break;
+ }
+ }
+ }
+ result.append(orig.substring(index, orig.length()));
+ return asis(result.toString());
}
/** @return a GWT block display widget displaying this HTML. */
diff --git a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
index 9cad57f..ec9d06f 100644
--- a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
+++ b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
@@ -17,40 +17,86 @@
import junit.framework.TestCase;
import java.util.Arrays;
+import java.util.List;
public class SafeHtml_ReplaceTest extends TestCase {
- public void testReplaceTwoLinks() {
- final RegexFindReplace[] repl = {//
- new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>") //
- };
- final SafeHtml o = html("A\nissue 42\nissue 9918\nB");
- final SafeHtml n = o.replaceAll(Arrays.asList(repl));
+ public void testReplaceOneLink() {
+ SafeHtml o = html("A\nissue 42\nB");
+ SafeHtml n = o.replaceAll(repls(
+ new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
assertNotSame(o, n);
- assertEquals("A\n" //
- + "<a href=\"?42\">issue 42</a>\n" //
- + "<a href=\"?9918\">issue 9918</a>\n" //
- + "B" //
+ assertEquals("A\n<a href=\"?42\">issue 42</a>\nB", n.asString());
+ }
+
+ public void testReplaceNoLeadingOrTrailingText() {
+ SafeHtml o = html("issue 42");
+ SafeHtml n = o.replaceAll(repls(
+ new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+ assertNotSame(o, n);
+ assertEquals("<a href=\"?42\">issue 42</a>", n.asString());
+ }
+
+ public void testReplaceTwoLinks() {
+ SafeHtml o = html("A\nissue 42\nissue 9918\nB");
+ SafeHtml n = o.replaceAll(repls(
+ new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+ assertNotSame(o, n);
+ assertEquals("A\n"
+ + "<a href=\"?42\">issue 42</a>\n"
+ + "<a href=\"?9918\">issue 9918</a>\n"
+ + "B"
, n.asString());
}
- public void testReplaceInOrder1() {
- final RegexFindReplace[] repl = {//
- new RegexFindReplace("(GWTEXPUI-(\\d+))",
- "<a href=\"gwtexpui-bug?$2\">$1</a>"), //
- new RegexFindReplace("(issue\\s+(\\d+))",
- "<a href=\"generic-bug?$2\">$1</a>"), //
- };
- final SafeHtml o = html("A\nissue 42\nReally GWTEXPUI-9918 is better\nB");
- final SafeHtml n = o.replaceAll(Arrays.asList(repl));
+ public void testReplaceInOrder() {
+ SafeHtml o = html("A\nissue 42\nReally GWTEXPUI-9918 is better\nB");
+ SafeHtml n = o.replaceAll(repls(
+ new RegexFindReplace("(GWTEXPUI-(\\d+))",
+ "<a href=\"gwtexpui-bug?$2\">$1</a>"),
+ new RegexFindReplace("(issue\\s+(\\d+))",
+ "<a href=\"generic-bug?$2\">$1</a>")));
assertNotSame(o, n);
- assertEquals("A\n" //
- + "<a href=\"generic-bug?42\">issue 42</a>\n" //
+ assertEquals("A\n"
+ + "<a href=\"generic-bug?42\">issue 42</a>\n"
+ "Really <a href=\"gwtexpui-bug?9918\">GWTEXPUI-9918</a> is better\n"
- + "B" //
+ + "B"
, n.asString());
}
+ public void testReplaceOverlappingAfterFirstChar() {
+ SafeHtml o = html("abcd");
+ RegexFindReplace ab = new RegexFindReplace("ab", "AB");
+ RegexFindReplace bc = new RegexFindReplace("bc", "23");
+ RegexFindReplace cd = new RegexFindReplace("cd", "YZ");
+
+ assertEquals("ABcd", o.replaceAll(repls(ab, bc)).asString());
+ assertEquals("ABcd", o.replaceAll(repls(bc, ab)).asString());
+ assertEquals("ABYZ", o.replaceAll(repls(ab, bc, cd)).asString());
+ }
+
+ public void testReplaceOverlappingAtFirstCharLongestMatch() {
+ SafeHtml o = html("abcd");
+ RegexFindReplace ab = new RegexFindReplace("ab", "AB");
+ RegexFindReplace abc = new RegexFindReplace("[^d][^d][^d]", "234");
+
+ assertEquals("ABcd", o.replaceAll(repls(ab, abc)).asString());
+ assertEquals("234d", o.replaceAll(repls(abc, ab)).asString());
+ }
+
+ public void testReplaceOverlappingAtFirstCharFirstMatch() {
+ SafeHtml o = html("abcd");
+ RegexFindReplace ab1 = new RegexFindReplace("ab", "AB");
+ RegexFindReplace ab2 = new RegexFindReplace("[^cd][^cd]", "12");
+
+ assertEquals("ABcd", o.replaceAll(repls(ab1, ab2)).asString());
+ assertEquals("12cd", o.replaceAll(repls(ab2, ab1)).asString());
+ }
+
private static SafeHtml html(String text) {
return new SafeHtmlBuilder().append(text).toSafeHtml();
}
+
+ private static List<RegexFindReplace> repls(RegexFindReplace... repls) {
+ return Arrays.asList(repls);
+ }
}