Modify 'sanitizeMetricName' to avoid collisions

At present there is a simple 'DropWizardMetricMaker.sanitizeMetricName'
implementation that:
* replaces the leading `/` with `_`
* reduces the repeated `/` chars to a single `/`
* removes the ending `/`
* replaces not supported chars with `_`

As a result collision between the sanitized metric names can be easily
created e.g. `foo_bar` will collide with `foo+bar`.

In order to avoid collisions keep the rules about slashes (they are
needed anyway) and:
* replace not supported chars with `_0x[HEX CODE]_` string (code is
  capitalized)
* the replacement prefix `0x` is prepended with another replacement
  prefix

As a result:
`foo_bar` stays `foo_bar`
`foo+bar` becomes `foo_0x2B_bar`
`foo_0x2B_bar` becomes `foo_0x_0x2B_bar`
and collision is no longer possible

The replace algorithm was inspired by the work done in If11e6576eff
(kudos to Dani).

Bug: Issue 40015585
Release-Notes: Enhance metric name sanitize function to remove collision on '_' between metrics. Gerrit core metrics remain unchanged but plugins metrics could be affected.
Change-Id: I6a5366fa0f1fd494006e2234565371d279e8a7f3
diff --git a/Documentation/dev-plugins.txt b/Documentation/dev-plugins.txt
index ca72f8b..11f85dd 100644
--- a/Documentation/dev-plugins.txt
+++ b/Documentation/dev-plugins.txt
@@ -2342,6 +2342,25 @@
 
 will cause the metrics to be recorded under `my-metrics/${metric-name}`.
 
+Note that metric name can be expressed through a limited set of characters
+`[a-zA-Z0-9_-]+(/[a-zA-Z0-9_-]+)*` therefore one should either ensure it
+or call
+link:https://gerrit.googlesource.com/gerrit/+/master/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java[
+sanitizeMetricName,role=external,window=_blank] to have it enforced.
+
+The
+link:https://gerrit.googlesource.com/gerrit/+/master/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java[
+sanitizeMetricName,role=external,window=_blank] performs the following
+modifications:
+
+* leading `/` is replaced with `_`
+* doubled (or repeated more times) `/` are reduced to a single `/`
+* ending `/` is removed
+* all characters that are not `/a-zA-Z0-9_-` are replaced with
+  `\_0x[HEX CODE]_` (code is capitalized)
+* if the replacement prefix (`_0x`) is found then it is prepended with another
+  replacement prefix
+
 See the replication metrics in the
 link:https://gerrit.googlesource.com/plugins/replication/+/master/src/main/java/com/googlesource/gerrit/plugins/replication/ReplicationMetrics.java[
 replication plugin,role=external,window=_blank] for an example of usage.
diff --git a/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java b/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
index 32be18d..0821e86 100644
--- a/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
+++ b/java/com/google/gerrit/metrics/dropwizard/DropWizardMetricMaker.java
@@ -60,6 +60,7 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
@@ -357,6 +358,8 @@
 
   private static final Pattern METRIC_NAME_PATTERN =
       Pattern.compile("[a-zA-Z0-9_-]+(/[a-zA-Z0-9_-]+)*");
+  private static final Pattern INVALID_CHAR_PATTERN = Pattern.compile("[^\\w-/]");
+  private static final String REPLACEMENT_PREFIX = "_0x";
 
   private static void checkMetricName(String name) {
     checkArgument(
@@ -366,24 +369,54 @@
         METRIC_NAME_PATTERN.pattern());
   }
 
+  /**
+   * Ensures that the sanitized metric name doesn't contain invalid characters and
+   * removes the risk of collision (between the sanitized metric names).
+   * Modifications to the input metric name:
+   * <ul>
+   *   <li/> leading <code>/</code> is replaced with <code>_</code>
+   *   <li/> doubled (or repeated more times) <code>/</code> are reduced to a single <code>/<code>
+   *   <li/> ending <code>/</code> is removed
+   *   <li/> all characters that are not <code>/a-zA-Z0-9_-</code> are replaced with <code>_0x[HEX CODE]_</code> (code is capitalized)
+   *   <li/> the replacement prefix <code>_0x</code> is prepended with another replacement prefix
+   * </ul>
+   *
+   * @param name name of the metric to sanitize
+   * @return sanitized metric name
+   */
   @Override
   public String sanitizeMetricName(String name) {
-    if (METRIC_NAME_PATTERN.matcher(name).matches()) {
+    if (METRIC_NAME_PATTERN.matcher(name).matches() && !name.contains(REPLACEMENT_PREFIX)) {
       return name;
     }
 
-    String first = name.substring(0, 1).replaceFirst("[^\\w-]", "_");
+    StringBuilder sanitizedName =
+        new StringBuilder(name.substring(0, 1).replaceFirst("[^\\w-]", "_"));
     if (name.length() == 1) {
-      return first;
+      return sanitizedName.toString();
     }
 
-    String result = first + name.substring(1).replaceAll("/[/]+", "/").replaceAll("[^\\w-/]", "_");
-
-    if (result.endsWith("/")) {
-      result = result.substring(0, result.length() - 1);
+    String slashSanitizedName = name.substring(1).replaceAll("/[/]+", "/");
+    if (slashSanitizedName.endsWith("/")) {
+      slashSanitizedName = slashSanitizedName.substring(0, slashSanitizedName.length() - 1);
     }
 
-    return result;
+    String replacementPrefixSanitizedName =
+        slashSanitizedName.replaceAll(REPLACEMENT_PREFIX, REPLACEMENT_PREFIX + REPLACEMENT_PREFIX);
+
+    for (int i = 0; i < replacementPrefixSanitizedName.length(); i++) {
+      Character c = replacementPrefixSanitizedName.charAt(i);
+      Matcher matcher = INVALID_CHAR_PATTERN.matcher(c.toString());
+      if (matcher.matches()) {
+        sanitizedName.append(REPLACEMENT_PREFIX);
+        sanitizedName.append(Integer.toHexString(c).toUpperCase());
+        sanitizedName.append('_');
+      } else {
+        sanitizedName.append(c);
+      }
+    }
+
+    return sanitizedName.toString();
   }
 
   static String name(Description.FieldOrdering ordering, String codeName, String fieldValues) {
diff --git a/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java b/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
index 5777779..a50520ac 100644
--- a/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
+++ b/javatests/com/google/gerrit/metrics/dropwizard/DropWizardMetricMakerTest.java
@@ -44,15 +44,33 @@
   }
 
   @Test
+  public void shouldNotSanitizeSafeName() throws Exception {
+    assertThat(metrics.sanitizeMetricName("metric/submetric1/submetric2/submetric3"))
+        .isEqualTo("metric/submetric1/submetric2/submetric3");
+  }
+
+  @Test
+  public void shouldNotCreateSimpleCollisions() throws Exception {
+    String sanitizedCase1 = metrics.sanitizeMetricName("foo_bar");
+    String sanitizedCase2 = metrics.sanitizeMetricName("foo+bar");
+    assertThat(sanitizedCase1).isNotEqualTo(sanitizedCase2);
+  }
+
+  @Test
+  public void shouldDoubleTheReplacementPrefix() throws Exception {
+    assertThat(metrics.sanitizeMetricName("foo_0x_bar")).isEqualTo("foo_0x_0x_bar");
+  }
+
+  @Test
   public void shouldSanitizeUnwantedChars() throws Exception {
     assertThat(metrics.sanitizeMetricName("very+confusing$long#metric@net/name^1"))
-        .isEqualTo("very_confusing_long_metric_net/name_1");
+        .isEqualTo("very_0x2B_confusing_0x24_long_0x23_metric_0x40_net/name_0x5E_1");
     assertThat(metrics.sanitizeMetricName("/metric/submetric")).isEqualTo("_metric/submetric");
   }
 
   @Test
   public void shouldReduceConsecutiveSlashesToOne() throws Exception {
-    assertThat(metrics.sanitizeMetricName("/metric//submetric1///submetric2/submetric3"))
+    assertThat(metrics.sanitizeMetricName("/metric///submetric1///submetric2/submetric3"))
         .isEqualTo("_metric/submetric1/submetric2/submetric3");
   }