PerformanceMetrics: Log per request latency for configured operations

PerformanceMetrics has 2 metrics, one to record the latency of the
configured operations and one to count how often the configured
operations are invoked in total. To be able to estimate how much a
latency improvement improves the latency for a request, we add a third
metric that records the total latency of calling the configured
operations during one request. This way we can know how much a certain
operation contributes to the request latency in total, and how much a
latency improvement speeds up the request.

E.g. at Google we migrate the accounts from NoteDb into another storage
and we hope this will speed up the "Loading account" operations which
can be called many times during a request. Having the new metric allows
us to see how much the latency of the different requests is improved by
this migration. Without the new metric we would only know how the
latency of the "Loading account" operation is improved, but not which
effect this had on the request latencies.

To know the total latency of the configured operations we sum up the
latency per operation in memory when the logNanos methods are invoked.
Then we use the total latency to record the new metric in a new
PerformanceLogger#done() method that is invoked after all logs have been
written. To be able to cache the total latency in memory the
PerformanceMetrics PerformanceLogger can no longer be a singleton, but
we need a new instance for every request.

Release-Notes: skip
Change-Id: I42fc1ab6328723fa05b0ed04979eef75f19f8d51
Signed-off-by: Edwin Kempin <ekempin@google.com>
diff --git a/java/com/google/gerrit/server/PerformanceMetrics.java b/java/com/google/gerrit/server/PerformanceMetrics.java
index 3d21fed..b894d49 100644
--- a/java/com/google/gerrit/server/PerformanceMetrics.java
+++ b/java/com/google/gerrit/server/PerformanceMetrics.java
@@ -14,6 +14,7 @@
 
 package com.google.gerrit.server;
 
+import com.google.auto.value.AutoValue;
 import com.google.common.collect.ImmutableList;
 import com.google.gerrit.common.Nullable;
 import com.google.gerrit.metrics.Counter3;
@@ -26,20 +27,23 @@
 import com.google.gerrit.server.logging.PerformanceLogger;
 import com.google.gerrit.server.logging.TraceContext;
 import com.google.inject.Inject;
-import com.google.inject.Singleton;
 import java.time.Instant;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import org.eclipse.jgit.lib.Config;
 
 /** Performance logger that records the execution times as a metric. */
-@Singleton
 public class PerformanceMetrics implements PerformanceLogger {
   private static final String OPERATION_LATENCY_METRIC_NAME = "performance/operations";
   private static final String OPERATION_COUNT_METRIC_NAME = "performance/operations_count";
 
   private final ImmutableList<String> tracedOperations;
-  public final Timer3<String, String, String> operationsLatency;
-  public final Counter3<String, String, String> operationsCounter;
+  private final Timer3<String, String, String> operationsLatency;
+  private final Counter3<String, String, String> operationsCounter;
+  private final Timer3<String, String, String> operationsRequestLatency;
+
+  private final Map<MetricKey, Long> perRequestLatencyNanos = new HashMap<>();
 
   @Inject
   PerformanceMetrics(@GerritServerConfig Config cfg, MetricMaker metricMaker) {
@@ -82,6 +86,17 @@
             operationNameField,
             requestField,
             pluginField);
+    this.operationsRequestLatency =
+        metricMaker
+            .newTimer(
+                OPERATION_LATENCY_METRIC_NAME,
+                new Description("Per request latency of performing operations")
+                    .setCumulative()
+                    .setUnit(Description.Units.MILLISECONDS),
+                operationNameField,
+                requestField,
+                pluginField)
+            .suppressLogging();
   }
 
   @Override
@@ -100,5 +115,36 @@
     String pluginTag = TraceContext.getPluginTag().orElse("");
     operationsLatency.record(operation, requestTag, pluginTag, durationNanos, TimeUnit.NANOSECONDS);
     operationsCounter.increment(operation, requestTag, pluginTag);
+
+    perRequestLatencyNanos.compute(
+        MetricKey.create(operation, requestTag, pluginTag),
+        (metricKey, latencyNanos) ->
+            (latencyNanos == null) ? durationNanos : latencyNanos + durationNanos);
+  }
+
+  @Override
+  public void done() {
+    perRequestLatencyNanos.forEach(
+        (metricKey, latencyNanos) ->
+            operationsRequestLatency.record(
+                metricKey.operation(),
+                metricKey.requestTag(),
+                metricKey.pluginTag(),
+                latencyNanos,
+                TimeUnit.NANOSECONDS));
+    perRequestLatencyNanos.clear();
+  }
+
+  @AutoValue
+  abstract static class MetricKey {
+    abstract String operation();
+
+    abstract String requestTag();
+
+    abstract String pluginTag();
+
+    public static MetricKey create(String operation, String requestTag, String pluginTag) {
+      return new AutoValue_PerformanceMetrics_MetricKey(operation, requestTag, pluginTag);
+    }
   }
 }
diff --git a/java/com/google/gerrit/server/logging/PerformanceLogContext.java b/java/com/google/gerrit/server/logging/PerformanceLogContext.java
index 90e716f..8cf5b84 100644
--- a/java/com/google/gerrit/server/logging/PerformanceLogContext.java
+++ b/java/com/google/gerrit/server/logging/PerformanceLogContext.java
@@ -92,6 +92,7 @@
             p -> {
               try (TraceContext traceContext = newPluginTrace(p)) {
                 performanceLogRecords.forEach(r -> r.writeTo(p.get()));
+                p.get().done();
               } catch (RuntimeException e) {
                 logger.atWarning().withCause(e).log(
                     "Failure in %s of plugin %s", p.get().getClass(), p.getPluginName());
diff --git a/java/com/google/gerrit/server/logging/PerformanceLogger.java b/java/com/google/gerrit/server/logging/PerformanceLogger.java
index fa61eb4..02fb37c 100644
--- a/java/com/google/gerrit/server/logging/PerformanceLogger.java
+++ b/java/com/google/gerrit/server/logging/PerformanceLogger.java
@@ -48,4 +48,10 @@
    * @param metadata metadata
    */
   void logNanos(String operation, long durationNanos, Instant endTime, Metadata metadata);
+
+  /**
+   * Called after all performance events of a request have been logged via {@link #logNanos(String,
+   * long, Instant)} or {@link #logNanos(String, long, Instant, Metadata)}.
+   */
+  default void done() {}
 }