Short-circuit on failed healthchecks

Before the full set of checks was always executed even if it was clear
that it would have failed because of one of the checks failing.

Avoid running the additional checks when one of the checks is failing
for any reason and therefore running the full set would not be useful.

Introduce the new status NOT_RUN that clarifies that the check was
not executed because it would have not change the overall status.

This change allows a Gerrit to avoid running extra checks when the
system is already struggling. Having too may healthchecks could
actually prevent the system to become healthy again.
Minimizing the execution of checks on a struggling system is paramount
for allowing it to recover more quickly.

Change-Id: I885b3df06b60e322f12b77674be02536e8131499
diff --git a/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/GlobalHealthCheck.java b/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/GlobalHealthCheck.java
index 773b6c0..4e4cef7 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/GlobalHealthCheck.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/GlobalHealthCheck.java
@@ -22,8 +22,11 @@
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import com.googlesource.gerrit.plugins.healthcheck.HealthCheckConfig;
+import java.util.Collections;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
@@ -32,6 +35,31 @@
 
   private final DynamicSet<HealthCheck> healthChecks;
 
+  public static class MemoizedStatusSummary implements Supplier<StatusSummary> {
+    private final AtomicReference<StatusSummary> result = new AtomicReference<>();
+    private final HealthCheck check;
+
+    MemoizedStatusSummary(HealthCheck check) {
+      this.check = check;
+    }
+
+    @Override
+    public StatusSummary get() {
+      if (result.get() == null) {
+        result.set(check.run());
+      }
+      return result.get();
+    }
+
+    public StatusSummary getIfCompleted() {
+      StatusSummary completedResult = result.get();
+      return completedResult == null
+          ? new StatusSummary(
+              Result.NOT_RUN, System.currentTimeMillis(), 0L, Collections.emptyMap())
+          : completedResult;
+    }
+  }
+
   @Inject
   public GlobalHealthCheck(
       DynamicSet<HealthCheck> healthChecks,
@@ -46,16 +74,16 @@
   public HealthCheck.StatusSummary run() {
     Iterable<HealthCheck> iterable = () -> healthChecks.iterator();
     long ts = System.currentTimeMillis();
-    Map<String, Object> checkToResults =
-        StreamSupport.stream(iterable.spliterator(), true)
-            .collect(Collectors.toMap(HealthCheck::name, HealthCheck::run));
+    Map<String, MemoizedStatusSummary> checkToResults =
+        StreamSupport.stream(iterable.spliterator(), false)
+            .collect(Collectors.toMap(HealthCheck::name, MemoizedStatusSummary::new));
     long elapsed = System.currentTimeMillis() - ts;
+    Result checkResult = hasAnyFailureOnResults(checkToResults) ? Result.FAILED : Result.PASSED;
+    Map<String, Object> reportedResults =
+        checkToResults.entrySet().stream()
+            .collect(Collectors.toMap(Map.Entry::getKey, v -> v.getValue().getIfCompleted()));
     StatusSummary globalStatus =
-        new HealthCheck.StatusSummary(
-            hasAnyFailureOnResults(checkToResults) ? Result.FAILED : Result.PASSED,
-            ts,
-            elapsed,
-            checkToResults);
+        new HealthCheck.StatusSummary(checkResult, ts, elapsed, reportedResults);
     if (globalStatus.isFailure()) {
       failureCounterMetric.increment();
     }
@@ -68,8 +96,7 @@
     return run().result;
   }
 
-  public static boolean hasAnyFailureOnResults(Map<String, Object> results) {
-    return results.values().stream()
-        .anyMatch(res -> res instanceof StatusSummary && ((StatusSummary) res).isFailure());
+  public static boolean hasAnyFailureOnResults(Map<String, MemoizedStatusSummary> results) {
+    return results.values().stream().parallel().anyMatch(res -> res.get().isFailure());
   }
 }
diff --git a/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/HealthCheck.java b/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/HealthCheck.java
index 7c4f8a1..db07320 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/HealthCheck.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/healthcheck/check/HealthCheck.java
@@ -30,6 +30,8 @@
     FAILED,
     @SerializedName("timeout")
     TIMEOUT,
+    @SerializedName("not_run")
+    NOT_RUN,
     @SerializedName("disabled")
     DISABLED;
   }
diff --git a/src/test/java/com/googlesource/gerrit/plugins/healthcheck/AbstractHealthCheckIntegrationTest.java b/src/test/java/com/googlesource/gerrit/plugins/healthcheck/AbstractHealthCheckIntegrationTest.java
index 56f96fe..1b1cd70 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/healthcheck/AbstractHealthCheckIntegrationTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/healthcheck/AbstractHealthCheckIntegrationTest.java
@@ -16,13 +16,20 @@
 
 import static com.google.common.truth.Truth.assertThat;
 
+import com.google.common.util.concurrent.ListeningExecutorService;
 import com.google.gerrit.acceptance.LightweightPluginDaemonTest;
 import com.google.gerrit.acceptance.RestResponse;
 import com.google.gerrit.extensions.annotations.PluginName;
+import com.google.gerrit.extensions.registration.DynamicSet;
+import com.google.gerrit.metrics.MetricMaker;
 import com.google.gson.Gson;
 import com.google.gson.JsonObject;
 import com.google.inject.AbstractModule;
+import com.google.inject.Inject;
 import com.google.inject.Key;
+import com.google.inject.Singleton;
+import com.googlesource.gerrit.plugins.healthcheck.check.AbstractHealthCheck;
+import com.googlesource.gerrit.plugins.healthcheck.check.HealthCheck;
 import com.googlesource.gerrit.plugins.healthcheck.check.HealthCheckNames;
 import java.io.IOException;
 import org.eclipse.jgit.errors.ConfigInvalidException;
@@ -34,6 +41,33 @@
     protected void configure() {
       install(new HealthCheckExtensionApiModule());
       install(new Module());
+      DynamicSet.bind(binder(), HealthCheck.class).to(FakeHealthCheck.class);
+    }
+  }
+
+  @Singleton
+  public static class FakeHealthCheck extends AbstractHealthCheck {
+    private long sleep = 0L;
+    private Result result = Result.PASSED;
+
+    @Inject
+    public FakeHealthCheck(
+        ListeningExecutorService executor, HealthCheckConfig config, MetricMaker metricMaker) {
+      super(executor, config, "fake-check", metricMaker);
+    }
+
+    public void setSleep(long sleep) {
+      this.sleep = sleep;
+    }
+
+    public void setResult(Result result) {
+      this.result = result;
+    }
+
+    @Override
+    protected Result doCheck() throws Exception {
+      Thread.sleep(sleep);
+      return result;
     }
   }
 
diff --git a/src/test/java/com/googlesource/gerrit/plugins/healthcheck/HealthCheckIT.java b/src/test/java/com/googlesource/gerrit/plugins/healthcheck/HealthCheckIT.java
index 86ce328..621ba17 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/healthcheck/HealthCheckIT.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/healthcheck/HealthCheckIT.java
@@ -26,9 +26,15 @@
 import com.google.gerrit.acceptance.Sandboxed;
 import com.google.gerrit.acceptance.TestPlugin;
 import com.google.gerrit.acceptance.config.GerritConfig;
+import com.google.gson.JsonElement;
 import com.google.gson.JsonObject;
+import com.googlesource.gerrit.plugins.healthcheck.check.HealthCheck;
 import java.io.File;
 import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 import javax.servlet.http.HttpServletResponse;
 import org.eclipse.jgit.errors.ConfigInvalidException;
 import org.junit.Before;
@@ -114,6 +120,25 @@
   }
 
   @Test
+  public void shouldNotRunChecksWhenOneFails() throws Exception {
+    FakeHealthCheck fakeHealthCheck = plugin.getSysInjector().getInstance(FakeHealthCheck.class);
+    fakeHealthCheck.setResult(HealthCheck.Result.FAILED);
+
+    RestResponse resp = getHealthCheckStatus();
+    resp.assertStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+    JsonObject respJson = getResponseJson(resp);
+    Set<Map.Entry<String, JsonElement>> checkResults = respJson.entrySet();
+    List<String> results =
+        checkResults.stream()
+            .map(Map.Entry::getValue)
+            .filter(JsonElement::isJsonObject)
+            .map(j -> j.getAsJsonObject().get("result"))
+            .map(JsonElement::getAsString)
+            .collect(Collectors.toList());
+    assertThat(results).contains("not_run");
+  }
+
+  @Test
   public void shouldReturnAuthCheckAsDisabled() throws Exception {
     disableCheck(AUTH);
     RestResponse resp = getHealthCheckStatus();