Implement safe find/replace for user-provided links
Extract an interface out of RegexFindReplace for replacement with
arbitrary types of sanitization or other munging. Implement a
LinkFindReplace that knows how to sanitize links with SafeHtmlBuilder.
Push the replacement into the FindReplace interface so implementations
can do sanitization _after_ regular expression replacement.
Change-Id: I46383abd1921a11b4c90b80b2022051fc0c8e13c
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
index 82eb859..f9fe345 100644
--- a/.settings/org.eclipse.core.resources.prefs
+++ b/.settings/org.eclipse.core.resources.prefs
@@ -1,3 +1,4 @@
-#Tue Sep 02 16:59:24 PDT 2008
eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/test/java=UTF-8
encoding/<project>=UTF-8
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
index b0b2fe2..3473ea8 100644
--- a/.settings/org.eclipse.jdt.core.prefs
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -1,4 +1,3 @@
-#Wed May 13 08:49:53 PDT 2009
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
@@ -9,6 +8,8 @@
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.processAnnotations=disabled
org.eclipse.jdt.core.compiler.source=1.6
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/FindReplace.java b/src/main/java/com/google/gwtexpui/safehtml/client/FindReplace.java
new file mode 100644
index 0000000..f7bc907
--- /dev/null
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/FindReplace.java
@@ -0,0 +1,40 @@
+// Copyright (C) 2013 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtexpui.safehtml.client;
+
+import com.google.gwt.regexp.shared.RegExp;
+
+/** A Find/Replace pair used against the {@link SafeHtml} block of text. */
+public interface FindReplace {
+ /**
+ * @return regular expression to match substrings with; should be treated as
+ * immutable.
+ */
+ public RegExp pattern();
+
+ /**
+ * Find and replace a single instance of this pattern in an input.
+ * <p>
+ * <b>WARNING:</b> No XSS sanitization is done on the return value of this
+ * method, e.g. this value may be passed directly to
+ * {@link SafeHtml#replaceAll(String, String)}. Implementations must sanitize output
+ * appropriately.
+ *
+ * @param input input string.
+ * @return result of regular expression replacement.
+ * @throws IllegalArgumentException if the input could not be safely sanitized.
+ */
+ public String replace(String input);
+}
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/LinkFindReplace.java b/src/main/java/com/google/gwtexpui/safehtml/client/LinkFindReplace.java
new file mode 100644
index 0000000..eaa4f23
--- /dev/null
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/LinkFindReplace.java
@@ -0,0 +1,84 @@
+// Copyright (C) 2013 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtexpui.safehtml.client;
+
+import com.google.gwt.regexp.shared.RegExp;
+
+/**
+ * A Find/Replace pair whose replacement string is a link.
+ * <p>
+ * It is safe to pass arbitrary user-provided links to this class. Links are
+ * sanitized as follows:
+ * <ul>
+ * <li>Only http(s) and mailto links are supported; any other scheme results in
+ * an {@link IllegalArgumentException} from {@link #replace(String)}.
+ * <li>Special characters in the link after regex replacement are escaped with
+ * {@link SafeHtmlBuilder}.</li>
+ * </ul>
+ */
+public class LinkFindReplace implements FindReplace {
+ public static boolean hasValidScheme(String link) {
+ int colon = link.indexOf(':');
+ if (colon < 0) {
+ return true;
+ }
+ String scheme = link.substring(0, colon);
+ return "http".equalsIgnoreCase(scheme)
+ || "https".equalsIgnoreCase(scheme)
+ || "mailto".equalsIgnoreCase(scheme);
+ }
+
+ private RegExp pat;
+ private String link;
+
+ protected LinkFindReplace() {
+ }
+
+ /**
+ * @param regex regular expression pattern to match substrings with.
+ * @param repl replacement link href. Capture groups within
+ * <code>regex</code> can be referenced with <code>$<i>n</i></code>.
+ */
+ public LinkFindReplace(String find, String link) {
+ this.pat = RegExp.compile(find);
+ this.link = link;
+ }
+
+ @Override
+ public RegExp pattern() {
+ return pat;
+ }
+
+ @Override
+ public String replace(String input) {
+ String href = pat.replace(input, link);
+ if (!hasValidScheme(href)) {
+ throw new IllegalArgumentException(
+ "Invalid scheme (" + toString() + "): " + href);
+ }
+ String result = new SafeHtmlBuilder()
+ .openAnchor()
+ .setAttribute("href", href)
+ .append(SafeHtml.asis(input))
+ .closeAnchor()
+ .asString();
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "find = " + pat.getSource() + ", link = " + link;
+ }
+}
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/RawFindReplace.java b/src/main/java/com/google/gwtexpui/safehtml/client/RawFindReplace.java
new file mode 100644
index 0000000..d22fef6
--- /dev/null
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/RawFindReplace.java
@@ -0,0 +1,55 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtexpui.safehtml.client;
+
+import com.google.gwt.regexp.shared.RegExp;
+
+/**
+ * A Find/Replace pair whose replacement string is arbitrary HTML.
+ * <p>
+ * <b>WARNING:</b> This class is not safe used with user-provided patterns.
+ */
+public class RawFindReplace implements FindReplace {
+ private RegExp pat;
+ private String replace;
+
+ protected RawFindReplace() {
+ }
+
+ /**
+ * @param regex regular expression pattern to match substrings with.
+ * @param repl replacement expression. Capture groups within
+ * <code>regex</code> can be referenced with <code>$<i>n</i></code>.
+ */
+ public RawFindReplace(String find, String replace) {
+ this.pat = RegExp.compile(find);
+ this.replace = replace;
+ }
+
+ @Override
+ public RegExp pattern() {
+ return pat;
+ }
+
+ @Override
+ public String replace(String input) {
+ return pat.replace(input, replace);
+ }
+
+ @Override
+ public String toString() {
+ return "find = " + pat.getSource() + ", replace = " + replace;
+ }
+}
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/RegexFindReplace.java b/src/main/java/com/google/gwtexpui/safehtml/client/RegexFindReplace.java
deleted file mode 100644
index fe272d4..0000000
--- a/src/main/java/com/google/gwtexpui/safehtml/client/RegexFindReplace.java
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (C) 2009 The Android Open Source Project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.gwtexpui.safehtml.client;
-
-/** A Find/Replace pair used against the SafeHtml block of text */
-public class RegexFindReplace {
- private String find;
- private String replace;
-
- protected RegexFindReplace() {
- }
-
- public RegexFindReplace(final String find, final String replace) {
- this.find = find;
- this.replace = replace;
- }
-
- public String find() {
- return find;
- }
-
- public String replace() {
- return replace;
- }
-
- @Override
- public String toString() {
- return "find = " + find + ", replace = " + replace;
- }
-}
diff --git a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
index 34ce6f4..5794386 100644
--- a/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
+++ b/src/main/java/com/google/gwtexpui/safehtml/client/SafeHtml.java
@@ -25,7 +25,6 @@
import com.google.gwt.user.client.ui.InlineHTML;
import com.google.gwt.user.client.ui.Widget;
-import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -115,9 +114,9 @@
/** Convert bare http:// and https:// URLs into <a href> tags. */
public SafeHtml linkify() {
final String part = "(?:" +
- "[a-zA-Z0-9$_.+!*',%;:@=?#/~-]" +
- "|&(?!lt;|gt;)" +
- ")";
+ "[a-zA-Z0-9$_.+!*',%;:@=?#/~-]" +
+ "|&(?!lt;|gt;)" +
+ ")";
return replaceAll(
"(https?://" +
part + "{2,}" +
@@ -241,35 +240,22 @@
return new SafeHtmlString(asString().replaceAll(regex, repl));
}
- private static class RegExpReplacement {
- private final RegExp re;
- private final String repl;
-
- private RegExpReplacement(String pat, String repl) {
- this.re = RegExp.compile(pat);
- this.repl = repl;
- }
- }
-
/**
* Replace all find/replace pairs in the list in a single pass.
*
* @param findReplaceList find/replace pairs to use.
* @return a new string, after the replacements have been made.
*/
- public SafeHtml replaceAll(final List<RegexFindReplace> findReplaceList) {
+ public <T> SafeHtml replaceAll(List<? extends FindReplace> findReplaceList) {
if (findReplaceList == null) {
return this;
}
- List<RegExpReplacement> repls =
- new ArrayList<RegExpReplacement>(findReplaceList.size());
StringBuilder pat = new StringBuilder();
- Iterator<RegexFindReplace> it = findReplaceList.iterator();
+ Iterator<? extends FindReplace> it = findReplaceList.iterator();
while (it.hasNext()) {
- RegexFindReplace fr = it.next();
- repls.add(new RegExpReplacement(fr.find(), fr.replace()));
- pat.append(fr.find());
+ FindReplace fr = it.next();
+ pat.append(fr.pattern().getSource());
if (it.hasNext()) {
pat.append('|');
}
@@ -283,10 +269,15 @@
while ((mat = re.exec(orig)) != null) {
String g = mat.getGroup(0);
// Re-run each candidate to find which one matched.
- for (RegExpReplacement repl : repls) {
- if (repl.re.exec(g) != null) {
- result.append(orig.substring(index, mat.getIndex()));
- result.append(repl.re.replace(g, repl.repl));
+ for (FindReplace fr : findReplaceList) {
+ if (fr.pattern().test(g)) {
+ try {
+ String repl = fr.replace(g);
+ result.append(orig.substring(index, mat.getIndex()));
+ result.append(repl);
+ } catch (IllegalArgumentException e) {
+ continue;
+ }
index = mat.getIndex() + g.length();
break;
}
diff --git a/src/test/java/com/google/gwtexpui/safehtml/client/LinkFindReplaceTest.java b/src/test/java/com/google/gwtexpui/safehtml/client/LinkFindReplaceTest.java
new file mode 100644
index 0000000..97f816f
--- /dev/null
+++ b/src/test/java/com/google/gwtexpui/safehtml/client/LinkFindReplaceTest.java
@@ -0,0 +1,77 @@
+// Copyright (C) 2013 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gwtexpui.safehtml.client;
+
+import static com.google.gwtexpui.safehtml.client.LinkFindReplace.hasValidScheme;
+
+import junit.framework.TestCase;
+
+public class LinkFindReplaceTest extends TestCase {
+ public void testNoEscaping() {
+ String find = "find";
+ String link = "link";
+ LinkFindReplace a = new LinkFindReplace(find, link);
+ assertEquals(find, a.pattern().getSource());
+ assertEquals("<a href=\"link\">find</a>", a.replace(find));
+ assertEquals("find = " + find + ", link = " + link, a.toString());
+ }
+
+ public void testBackreference() {
+ assertEquals("<a href=\"/bug?id=123\">issue 123</a>",
+ new LinkFindReplace("(bug|issue)\\s*([0-9]+)", "/bug?id=$2")
+ .replace("issue 123"));
+ }
+
+ public void testHasValidScheme() {
+ assertTrue(hasValidScheme("/absolute/path"));
+ assertTrue(hasValidScheme("relative/path"));
+ assertTrue(hasValidScheme("http://url/"));
+ assertTrue(hasValidScheme("HTTP://url/"));
+ assertTrue(hasValidScheme("https://url/"));
+ assertTrue(hasValidScheme("mailto://url/"));
+ assertFalse(hasValidScheme("ftp://url/"));
+ assertFalse(hasValidScheme("data:evil"));
+ assertFalse(hasValidScheme("javascript:alert(1)"));
+ }
+
+ public void testInvalidSchemeInReplace() {
+ try {
+ new LinkFindReplace("find", "javascript:alert(1)").replace("find");
+ fail("Expected IllegalStateException");
+ } catch (IllegalArgumentException expected) {
+ }
+ }
+
+ public void testInvalidSchemeWithBackreference() {
+ try {
+ new LinkFindReplace(".*(script:[^;]*)", "java$1")
+ .replace("Look at this script: alert(1);");
+ fail("Expected IllegalStateException");
+ } catch (IllegalArgumentException expected) {
+ }
+ }
+
+ public void testReplaceEscaping() {
+ assertEquals("<a href=\"a"&'<>b\">find</a>",
+ new LinkFindReplace("find", "a\"&'<>b").replace("find"));
+ }
+
+ public void testHtmlInFind() {
+ String rawFind = "<b>"bold"</b>";
+ LinkFindReplace a = new LinkFindReplace(rawFind, "/bold");
+ assertEquals(rawFind, a.pattern().getSource());
+ assertEquals("<a href=\"/bold\">" + rawFind + "</a>", a.replace(rawFind));
+ }
+}
diff --git a/src/test/java/com/google/gwtexpui/safehtml/client/RegexFindReplaceTest.java b/src/test/java/com/google/gwtexpui/safehtml/client/RawFindReplaceTest.java
similarity index 77%
rename from src/test/java/com/google/gwtexpui/safehtml/client/RegexFindReplaceTest.java
rename to src/test/java/com/google/gwtexpui/safehtml/client/RawFindReplaceTest.java
index a11a7ec..9c450bd 100644
--- a/src/test/java/com/google/gwtexpui/safehtml/client/RegexFindReplaceTest.java
+++ b/src/test/java/com/google/gwtexpui/safehtml/client/RawFindReplaceTest.java
@@ -16,13 +16,13 @@
import junit.framework.TestCase;
-public class RegexFindReplaceTest extends TestCase {
- public void testCreate() {
+public class RawFindReplaceTest extends TestCase {
+ public void testFindReplace() {
final String find = "find";
final String replace = "replace";
- final RegexFindReplace a = new RegexFindReplace(find, replace);
- assertSame(find, a.find());
- assertSame(replace, a.replace());
+ final RawFindReplace a = new RawFindReplace(find, replace);
+ assertEquals(find, a.pattern().getSource());
+ assertEquals(replace, a.replace(find));
assertEquals("find = " + find + ", replace = " + replace, a.toString());
}
}
diff --git a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
index ec9d06f..e8c14d7 100644
--- a/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
+++ b/src/test/java/com/google/gwtexpui/safehtml/client/SafeHtml_ReplaceTest.java
@@ -23,7 +23,7 @@
public void testReplaceOneLink() {
SafeHtml o = html("A\nissue 42\nB");
SafeHtml n = o.replaceAll(repls(
- new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+ new RawFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
assertNotSame(o, n);
assertEquals("A\n<a href=\"?42\">issue 42</a>\nB", n.asString());
}
@@ -31,7 +31,7 @@
public void testReplaceNoLeadingOrTrailingText() {
SafeHtml o = html("issue 42");
SafeHtml n = o.replaceAll(repls(
- new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+ new RawFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
assertNotSame(o, n);
assertEquals("<a href=\"?42\">issue 42</a>", n.asString());
}
@@ -39,7 +39,7 @@
public void testReplaceTwoLinks() {
SafeHtml o = html("A\nissue 42\nissue 9918\nB");
SafeHtml n = o.replaceAll(repls(
- new RegexFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
+ new RawFindReplace("(issue\\s(\\d+))", "<a href=\"?$2\">$1</a>")));
assertNotSame(o, n);
assertEquals("A\n"
+ "<a href=\"?42\">issue 42</a>\n"
@@ -51,9 +51,9 @@
public void testReplaceInOrder() {
SafeHtml o = html("A\nissue 42\nReally GWTEXPUI-9918 is better\nB");
SafeHtml n = o.replaceAll(repls(
- new RegexFindReplace("(GWTEXPUI-(\\d+))",
+ new RawFindReplace("(GWTEXPUI-(\\d+))",
"<a href=\"gwtexpui-bug?$2\">$1</a>"),
- new RegexFindReplace("(issue\\s+(\\d+))",
+ new RawFindReplace("(issue\\s+(\\d+))",
"<a href=\"generic-bug?$2\">$1</a>")));
assertNotSame(o, n);
assertEquals("A\n"
@@ -65,9 +65,9 @@
public void testReplaceOverlappingAfterFirstChar() {
SafeHtml o = html("abcd");
- RegexFindReplace ab = new RegexFindReplace("ab", "AB");
- RegexFindReplace bc = new RegexFindReplace("bc", "23");
- RegexFindReplace cd = new RegexFindReplace("cd", "YZ");
+ RawFindReplace ab = new RawFindReplace("ab", "AB");
+ RawFindReplace bc = new RawFindReplace("bc", "23");
+ RawFindReplace cd = new RawFindReplace("cd", "YZ");
assertEquals("ABcd", o.replaceAll(repls(ab, bc)).asString());
assertEquals("ABcd", o.replaceAll(repls(bc, ab)).asString());
@@ -76,8 +76,8 @@
public void testReplaceOverlappingAtFirstCharLongestMatch() {
SafeHtml o = html("abcd");
- RegexFindReplace ab = new RegexFindReplace("ab", "AB");
- RegexFindReplace abc = new RegexFindReplace("[^d][^d][^d]", "234");
+ RawFindReplace ab = new RawFindReplace("ab", "AB");
+ RawFindReplace abc = new RawFindReplace("[^d][^d][^d]", "234");
assertEquals("ABcd", o.replaceAll(repls(ab, abc)).asString());
assertEquals("234d", o.replaceAll(repls(abc, ab)).asString());
@@ -85,18 +85,28 @@
public void testReplaceOverlappingAtFirstCharFirstMatch() {
SafeHtml o = html("abcd");
- RegexFindReplace ab1 = new RegexFindReplace("ab", "AB");
- RegexFindReplace ab2 = new RegexFindReplace("[^cd][^cd]", "12");
+ RawFindReplace ab1 = new RawFindReplace("ab", "AB");
+ RawFindReplace ab2 = new RawFindReplace("[^cd][^cd]", "12");
assertEquals("ABcd", o.replaceAll(repls(ab1, ab2)).asString());
assertEquals("12cd", o.replaceAll(repls(ab2, ab1)).asString());
}
+ public void testFailedSanitization() {
+ SafeHtml o = html("abcd");
+ LinkFindReplace evil = new LinkFindReplace("(b)", "javascript:alert('$1')");
+ LinkFindReplace ok = new LinkFindReplace("(b)", "/$1");
+ assertEquals("abcd", o.replaceAll(repls(evil)).asString());
+ String linked = "a<a href=\"/b\">b</a>cd";
+ assertEquals(linked, o.replaceAll(repls(ok)).asString());
+ assertEquals(linked, o.replaceAll(repls(evil, ok)).asString());
+ }
+
private static SafeHtml html(String text) {
return new SafeHtmlBuilder().append(text).toSafeHtml();
}
- private static List<RegexFindReplace> repls(RegexFindReplace... repls) {
+ private static List<FindReplace> repls(FindReplace... repls) {
return Arrays.asList(repls);
}
}