Modify 'sanitizeMetricName' to avoid collisions
At present there is a simple 'DropWizardMetricMaker.sanitizeMetricName'
implementation that:
* replaces the leading `/` with `_`
* reduces the repeated `/` chars to a single `/`
* removes the ending `/`
* replaces not supported chars with `_`
As a result collision between the sanitized metric names can be easily
created e.g. `foo_bar` will collide with `foo+bar`.
In order to avoid collisions keep the rules about slashes (they are
needed anyway) and:
* replace not supported chars with `_0x[HEX CODE]_` string (code is
capitalized)
* the replacement prefix `0x` is prepended with another replacement
prefix
As a result:
`foo_bar` stays `foo_bar`
`foo+bar` becomes `foo_0x2B_bar`
`foo_0x2B_bar` becomes `foo_0x_0x2B_bar`
and collision is no longer possible
The replace algorithm was inspired by the work done in If11e6576eff
(kudos to Dani).
Bug: Issue 40015585
Release-Notes: Enhance metric name sanitize function to remove collision on '_' between metrics. Gerrit core metrics remain unchanged but plugins metrics could be affected.
Change-Id: I6a5366fa0f1fd494006e2234565371d279e8a7f3
diff --git a/Documentation/dev-plugins.txt b/Documentation/dev-plugins.txt
index ca72f8b..11f85dd 100644
--- a/Documentation/dev-plugins.txt
+++ b/Documentation/dev-plugins.txt
@@ -2342,6 +2342,25 @@
will cause the metrics to be recorded under `my-metrics/${metric-name}`.
+Note that metric name can be expressed through a limited set of characters
+`[a-zA-Z0-9_-]+(/[a-zA-Z0-9_-]+)*` therefore one should either ensure it
+or call
+link:https://gerrit.googlesource.com/gerrit/+/master/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java[
+sanitizeMetricName,role=external,window=_blank] to have it enforced.
+
+The
+link:https://gerrit.googlesource.com/gerrit/+/master/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java[
+sanitizeMetricName,role=external,window=_blank] performs the following
+modifications:
+
+* leading `/` is replaced with `_`
+* doubled (or repeated more times) `/` are reduced to a single `/`
+* ending `/` is removed
+* all characters that are not `/a-zA-Z0-9_-` are replaced with
+ `\_0x[HEX CODE]_` (code is capitalized)
+* if the replacement prefix (`_0x`) is found then it is prepended with another
+ replacement prefix
+
See the replication metrics in the
link:https://gerrit.googlesource.com/plugins/replication/+/master/src/main/java/com/googlesource/gerrit/plugins/replication/ReplicationMetrics.java[
replication plugin,role=external,window=_blank] for an example of usage.
diff --git a/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java b/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
index 32be18d..0821e86 100644
--- a/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
+++ b/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
@@ -60,6 +60,7 @@
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -357,6 +358,8 @@
private static final Pattern METRIC_NAME_PATTERN =
Pattern.compile("[a-zA-Z0-9_-]+(/[a-zA-Z0-9_-]+)*");
+ private static final Pattern INVALID_CHAR_PATTERN = Pattern.compile("[^\\w-/]");
+ private static final String REPLACEMENT_PREFIX = "_0x";
private static void checkMetricName(String name) {
checkArgument(
@@ -366,24 +369,54 @@
METRIC_NAME_PATTERN.pattern());
}
+ /**
+ * Ensures that the sanitized metric name doesn't contain invalid characters and
+ * removes the risk of collision (between the sanitized metric names).
+ * Modifications to the input metric name:
+ * <ul>
+ * <li/> leading <code>/</code> is replaced with <code>_</code>
+ * <li/> doubled (or repeated more times) <code>/</code> are reduced to a single <code>/<code>
+ * <li/> ending <code>/</code> is removed
+ * <li/> all characters that are not <code>/a-zA-Z0-9_-</code> are replaced with <code>_0x[HEX CODE]_</code> (code is capitalized)
+ * <li/> the replacement prefix <code>_0x</code> is prepended with another replacement prefix
+ * </ul>
+ *
+ * @param name name of the metric to sanitize
+ * @return sanitized metric name
+ */
@Override
public String sanitizeMetricName(String name) {
- if (METRIC_NAME_PATTERN.matcher(name).matches()) {
+ if (METRIC_NAME_PATTERN.matcher(name).matches() && !name.contains(REPLACEMENT_PREFIX)) {
return name;
}
- String first = name.substring(0, 1).replaceFirst("[^\\w-]", "_");
+ StringBuilder sanitizedName =
+ new StringBuilder(name.substring(0, 1).replaceFirst("[^\\w-]", "_"));
if (name.length() == 1) {
- return first;
+ return sanitizedName.toString();
}
- String result = first + name.substring(1).replaceAll("/[/]+", "/").replaceAll("[^\\w-/]", "_");
-
- if (result.endsWith("/")) {
- result = result.substring(0, result.length() - 1);
+ String slashSanitizedName = name.substring(1).replaceAll("/[/]+", "/");
+ if (slashSanitizedName.endsWith("/")) {
+ slashSanitizedName = slashSanitizedName.substring(0, slashSanitizedName.length() - 1);
}
- return result;
+ String replacementPrefixSanitizedName =
+ slashSanitizedName.replaceAll(REPLACEMENT_PREFIX, REPLACEMENT_PREFIX + REPLACEMENT_PREFIX);
+
+ for (int i = 0; i < replacementPrefixSanitizedName.length(); i++) {
+ Character c = replacementPrefixSanitizedName.charAt(i);
+ Matcher matcher = INVALID_CHAR_PATTERN.matcher(c.toString());
+ if (matcher.matches()) {
+ sanitizedName.append(REPLACEMENT_PREFIX);
+ sanitizedName.append(Integer.toHexString(c).toUpperCase());
+ sanitizedName.append('_');
+ } else {
+ sanitizedName.append(c);
+ }
+ }
+
+ return sanitizedName.toString();
}
static String name(Description.FieldOrdering ordering, String codeName, String fieldValues) {
diff --git a/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java b/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
index 5777779..a50520ac 100644
--- a/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
+++ b/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
@@ -44,15 +44,33 @@
}
@Test
+ public void shouldNotSanitizeSafeName() throws Exception {
+ assertThat(metrics.sanitizeMetricName("metric/submetric1/submetric2/submetric3"))
+ .isEqualTo("metric/submetric1/submetric2/submetric3");
+ }
+
+ @Test
+ public void shouldNotCreateSimpleCollisions() throws Exception {
+ String sanitizedCase1 = metrics.sanitizeMetricName("foo_bar");
+ String sanitizedCase2 = metrics.sanitizeMetricName("foo+bar");
+ assertThat(sanitizedCase1).isNotEqualTo(sanitizedCase2);
+ }
+
+ @Test
+ public void shouldDoubleTheReplacementPrefix() throws Exception {
+ assertThat(metrics.sanitizeMetricName("foo_0x_bar")).isEqualTo("foo_0x_0x_bar");
+ }
+
+ @Test
public void shouldSanitizeUnwantedChars() throws Exception {
assertThat(metrics.sanitizeMetricName("very+confusing$long#metric@net/name^1"))
- .isEqualTo("very_confusing_long_metric_net/name_1");
+ .isEqualTo("very_0x2B_confusing_0x24_long_0x23_metric_0x40_net/name_0x5E_1");
assertThat(metrics.sanitizeMetricName("/metric/submetric")).isEqualTo("_metric/submetric");
}
@Test
public void shouldReduceConsecutiveSlashesToOne() throws Exception {
- assertThat(metrics.sanitizeMetricName("/metric//submetric1///submetric2/submetric3"))
+ assertThat(metrics.sanitizeMetricName("/metric///submetric1///submetric2/submetric3"))
.isEqualTo("_metric/submetric1/submetric2/submetric3");
}