Always report healthy when instance has caught up with outstanding tasks
Once the instance has caught up with outstanding pull-replication tasks
on startup, we consider the node healthy and ready to receive write
traffic. As such, any subsequent invocations of the check should return
a healthy status, regardless of pending or inflight tasks being present.
Bug: Issue 312895374
Change-Id: I831dbb723a8a3af15eb12331e6073396534cdde8
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheck.java b/src/main/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheck.java
index a621778..778a6cc 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheck.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheck.java
@@ -46,6 +46,7 @@
private final SourcesCollection sourcesCollection;
private final Ticker ticker;
private Optional<Long> successfulSince = Optional.empty();
+ private boolean isCaughtUp;
@Inject
public PullReplicationTasksHealthCheck(
@@ -86,6 +87,9 @@
@Override
protected Result doCheck() throws Exception {
+ if (isCaughtUp) {
+ return Result.PASSED;
+ }
long checkTime = ticker.read();
List<Source> sources = sourcesCollection.getAll();
boolean hasNoOutstandingTasks =
@@ -108,9 +112,9 @@
}
private HealthCheck.Result reportResult(long checkTime) {
- return successfulSince
- .filter(ss -> checkTime >= ss + periodOfTimeNanos)
- .map(ss -> Result.PASSED)
- .orElse(Result.FAILED);
+ Optional<Result> maybePassed =
+ successfulSince.filter(ss -> checkTime >= ss + periodOfTimeNanos).map(ss -> Result.PASSED);
+ isCaughtUp = maybePassed.isPresent();
+ return maybePassed.orElse(Result.FAILED);
}
}
diff --git a/src/main/resources/Documentation/healthcheck.md b/src/main/resources/Documentation/healthcheck.md
index 3b84642..afe773b 100644
--- a/src/main/resources/Documentation/healthcheck.md
+++ b/src/main/resources/Documentation/healthcheck.md
@@ -2,8 +2,8 @@
==============
The @PLUGIN@ plugin registers the `pull-replication-outstanding-tasks`
-healthcheck. This check will mark a gerrit instance as healthy
-only when the node has caught up with all the outstanding
+healthcheck. This check will mark a gerrit instance as healthy upon
+startup only when the node has caught up with all the outstanding
pull-replication tasks. The goal is to mark the node as healthy when it
is ready to receive write traffic. "Caught up" means:
@@ -14,6 +14,10 @@
See [Healthcheck based on replication tasks](https://issues.gerritcodereview.com/issues/312895374) for more details.
+**It is worth noting that once the healthcheck eventually succeeds and
+the instance is marked healthy, the check is then skipped (ie any
+subsequent invocations will always mark the instance as healthy
+irrespective of any pending or inflight tasks being present).**
Health check configuration
--------------------------
diff --git a/src/test/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheckTest.java b/src/test/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheckTest.java
index 6dfbc03..06fa996 100644
--- a/src/test/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheckTest.java
+++ b/src/test/java/com/googlesource/gerrit/plugins/replication/pull/health/PullReplicationTasksHealthCheckTest.java
@@ -179,6 +179,26 @@
assertThat(check.run().result).isEqualTo(HealthCheck.Result.PASSED);
}
+ @Test
+ public void shouldAlwaysReportHealthyAfterItHasCaughtUpWithOutstandingTasks() {
+ int numCheckIterations = 2;
+ when(source.pendingTasksCount()).thenReturn(0L, 0L, 1L);
+ when(source.inflightTasksCount()).thenReturn(0L);
+
+ Injector injector = testInjector(new TestModule(List.of(), periodOfTimeMillisStr));
+ PullReplicationTasksHealthCheck check =
+ injector.getInstance(PullReplicationTasksHealthCheck.class);
+
+ assertThat(
+ runNTimes(
+ numCheckIterations,
+ check,
+ () -> fakeTicker.advance(Duration.ofMillis(periodOfTimeMillis))))
+ .containsExactly(HealthCheck.Result.FAILED, HealthCheck.Result.PASSED);
+
+ assertThat(check.run().result).isEqualTo(HealthCheck.Result.PASSED);
+ }
+
private Injector testInjector(AbstractModule testModule) {
return Guice.createInjector(new HealthCheckExtensionApiModule(), testModule);
}