Use healthcheck plugin for readiness/liveness-probes for Gerrit replica

The Gerrit replica pod did not yet have readiness or liveness probes.
Since the gerrit replica does not expose the REST API, this cannot be
achieved with Gerrit core. Thus, the healthcheck plugin was used
instead.

To ensure that this works, the healthcheck plugin is built into the
Gerrit container image and will be installed in any case. However, by
configuring a packaged or downloaded version in the helm chart this can
be overwritten.

In addition, a default configuration is provided to make the
healthcheck work on new Gerrit servers without existing data. To
do that the querychanges and auth healthchecks are disabled by
default. This config can be overwritten in the helm chart.

Change-Id: I15db8e86286404c7ba1f1660d4b82f0e80d9d6bc
diff --git a/container-images/gerrit-base/Dockerfile b/container-images/gerrit-base/Dockerfile
index 59766c6..de4c378 100644
--- a/container-images/gerrit-base/Dockerfile
+++ b/container-images/gerrit-base/Dockerfile
@@ -8,6 +8,8 @@
 
 RUN mkdir -p /var/gerrit/bin && \
     mkdir -p /var/gerrit/etc && \
+    mkdir -p /var/gerrit/plugins && \
+    mkdir -p /var/plugins && \
     mkdir -p /var/war
 
 # Download Gerrit release
@@ -15,10 +17,16 @@
 RUN curl -k -o /var/war/gerrit.war ${GERRIT_WAR_URL} && \
     ln -s /var/war/gerrit.war /var/gerrit/bin/gerrit.war
 
+# Download healthcheck plugin
+RUN curl -k -o /var/plugins/healthcheck.jar \
+        https://gerrit-ci.gerritforge.com/job/plugin-healthcheck-bazel-stable-3.1/lastSuccessfulBuild/artifact/bazel-bin/plugins/healthcheck/healthcheck.jar && \
+    ln -s /var/plugins/healthcheck.jar /var/gerrit/plugins/healthcheck.jar
+
 # Allow incoming traffic
 EXPOSE 29418 8080
 
 RUN chown -R gerrit:users /var/gerrit && \
+    chown -R gerrit:users /var/plugins && \
     chown -R gerrit:users /var/war
 USER gerrit
 
diff --git a/container-images/gerrit-init/tools/download_plugins.py b/container-images/gerrit-init/tools/download_plugins.py
index 3615b93..e9fcc6c 100755
--- a/container-images/gerrit-init/tools/download_plugins.py
+++ b/container-images/gerrit-init/tools/download_plugins.py
@@ -17,6 +17,7 @@
 import argparse
 import hashlib
 import os
+import shutil
 import time
 
 from abc import ABC, abstractmethod
@@ -40,6 +41,8 @@
         self.site = site
         self.config = config
 
+        self.required_plugins = self._get_required_plugins()
+
         self.plugin_dir = os.path.join(site, "plugins")
         self.plugins_changed = False
 
@@ -54,6 +57,31 @@
 
         return list()
 
+    def _get_required_plugins(self):
+        required = [
+            os.path.splitext(f)[0]
+            for f in os.listdir("/var/plugins")
+            if f.endswith(".jar")
+        ]
+        return list(
+            filter(
+                lambda x: x not in self.config.get_all_configured_plugins(), required
+            )
+        )
+
+    def _install_plugins_from_container(self):
+        source_dir = "/var/plugins"
+        for plugin in self.required_plugins:
+            source_file = os.path.join(source_dir, plugin + ".jar")
+            target_file = os.path.join(self.plugin_dir, plugin + ".jar")
+            if os.path.exists(target_file) and self._get_file_sha(
+                source_file
+            ) == self._get_file_sha(target_file):
+                continue
+
+            shutil.copyfile(source_file, target_file)
+            self.plugins_changed = True
+
     @staticmethod
     def _get_file_sha(file):
         file_hash = hashlib.sha1()
@@ -69,8 +97,8 @@
         return file_hash.hexdigest()
 
     def _remove_unwanted_plugins(self):
-        wanted_plugins = [plugin["name"] for plugin in self.config.downloaded_plugins]
-        wanted_plugins.extend(self.config.packaged_plugins)
+        wanted_plugins = list(self.config.get_all_configured_plugins())
+        wanted_plugins.extend(self.required_plugins)
         for plugin in self._get_installed_plugins():
             if os.path.splitext(plugin)[0] not in wanted_plugins:
                 os.remove(os.path.join(self.plugin_dir, plugin))
@@ -79,6 +107,7 @@
     def execute(self):
         self._create_plugins_dir()
         self._remove_unwanted_plugins()
+        self._install_plugins_from_container()
 
         for plugin in self.config.downloaded_plugins:
             self._install_plugin(plugin)
diff --git a/container-images/gerrit-init/tools/init_config.py b/container-images/gerrit-init/tools/init_config.py
index 266fdde..f20c71b 100644
--- a/container-images/gerrit-init/tools/init_config.py
+++ b/container-images/gerrit-init/tools/init_config.py
@@ -44,3 +44,8 @@
             self.plugin_cache_dir = config["pluginCacheDir"]
 
         return self
+
+    def get_all_configured_plugins(self):
+        plugins = set(self.packaged_plugins)
+        plugins.update([p["name"] for p in self.downloaded_plugins])
+        return plugins
diff --git a/helm-charts/gerrit-replica/README.md b/helm-charts/gerrit-replica/README.md
index c6939b2..cf25ec4 100644
--- a/helm-charts/gerrit-replica/README.md
+++ b/helm-charts/gerrit-replica/README.md
@@ -261,6 +261,8 @@
 | `gerritReplica.replicas`                      | Number of pod replicas to deploy                                                                    | `1`                                                                             |
 | `gerritReplica.maxSurge`                      | Max. percentage or number of pods allowed to be scheduled above the desired number                  | `25%`                                                                           |
 | `gerritReplica.maxUnavailable`                | Max. percentage or number of pods allowed to be unavailable at a time                               | `100%`                                                                          |
+| `gerritReplica.livenessProbe`                 | Configuration of the liveness probe timings                                                         | `{initialDelaySeconds: 60, periodSeconds: 5}`                                   |
+| `gerritReplica.readinessProbe`                | Configuration of the readiness probe timings                                                        | `{initialDelaySeconds: 10, periodSeconds: 10}`                                  |
 | `gerritReplica.resources`                     | Configure the amount of resources the pod requests/is allowed                                       | `requests.cpu: 1`                                                               |
 |                                               |                                                                                                     | `requests.memory: 5Gi`                                                          |
 |                                               |                                                                                                     | `limits.cpu: 1`                                                                 |
@@ -342,6 +344,16 @@
     memory resource limit set for the container (e.g. `-Xmx4g`). In your calculation
     allow memory for other components running in the container.
 
+To enable liveness- and readiness probes, the healthcheck plugin will be installed
+by default. Note, that by configuring to use a packaged or downloaded version of
+the healthcheck plugin, the configured version will take precedence over the default
+version. The plugin is by default configured to disable the `querychanges` and
+`auth` healthchecks, since the Gerrit replica does not index changes and a new
+Gerrit server will not yet necessarily have an user to validate authentication.
+
+The default configuration can be overwritten by adding the `healthcheck.config`
+file as a key-value pair to `gerritReplica.etc.config` as for every other configuration.
+
 ## Upgrading the Chart
 
 To upgrade an existing installation of the gerrit-replica chart, e.g. to install
diff --git a/helm-charts/gerrit-replica/templates/gerrit-replica.configmap.yaml b/helm-charts/gerrit-replica/templates/gerrit-replica.configmap.yaml
index 5e98f01..10962c1 100644
--- a/helm-charts/gerrit-replica/templates/gerrit-replica.configmap.yaml
+++ b/helm-charts/gerrit-replica/templates/gerrit-replica.configmap.yaml
@@ -12,6 +12,15 @@
   {{ $key }}:
 {{ toYaml $value | indent 4 }}
   {{- end }}
+  {{- if not (hasKey .Values.gerritReplica.etc.config "healthcheck.config") }}
+  healthcheck.config: |-
+    [healthcheck "auth"]
+      # On new instances there may be no users to use for healthchecks
+      enabled = false
+    [healthcheck "querychanges"]
+      # On new instances there won't be any changes to query
+      enabled = false
+  {{- end }}
 ---
 apiVersion: v1
 kind: ConfigMap
diff --git a/helm-charts/gerrit-replica/templates/gerrit-replica.deployment.yaml b/helm-charts/gerrit-replica/templates/gerrit-replica.deployment.yaml
index 6aff31d..4e208bf 100644
--- a/helm-charts/gerrit-replica/templates/gerrit-replica.deployment.yaml
+++ b/helm-charts/gerrit-replica/templates/gerrit-replica.deployment.yaml
@@ -126,9 +126,11 @@
         image: {{ template "registry" . }}{{ .Values.gerritReplica.images.gerritReplica }}:{{ .Values.images.version }}
         imagePullPolicy: {{ .Values.images.imagePullPolicy }}
         ports:
-        - containerPort: 8080
+        - name: http
+          containerPort: 8080
         {{ if .Values.gerritReplica.service.ssh -}}
-        - containerPort: 29418
+        - name: ssh
+          containerPort: 29418
         {{- end }}
         volumeMounts:
         - name: gerrit-site
@@ -148,6 +150,16 @@
           mountPath: "/var/mnt/etc/config"
         - name: gerrit-replica-secure-config
           mountPath: "/var/mnt/etc/secret"
+        livenessProbe:
+          httpGet:
+            path: /config/server/healthcheck~status
+            port: http
+{{ toYaml .Values.gerritReplica.livenessProbe | indent 10 }}
+        readinessProbe:
+          httpGet:
+            path: /config/server/healthcheck~status
+            port: http
+{{ toYaml .Values.gerritReplica.readinessProbe | indent 10 }}
         resources:
 {{ toYaml .Values.gerritReplica.resources | indent 10 }}
       {{ if .Values.promtailSidecar.enabled -}}
diff --git a/helm-charts/gerrit-replica/values.yaml b/helm-charts/gerrit-replica/values.yaml
index be5094d..879a894 100644
--- a/helm-charts/gerrit-replica/values.yaml
+++ b/helm-charts/gerrit-replica/values.yaml
@@ -204,6 +204,14 @@
   # work.
   maxUnavailable: 100%
 
+  livenessProbe:
+    initialDelaySeconds: 60
+    periodSeconds: 5
+
+  readinessProbe:
+    initialDelaySeconds: 10
+    periodSeconds: 10
+
   # The memory limit has to be higher than the configures heap-size for Java!
   resources:
     requests:
diff --git a/tests/conftest.py b/tests/conftest.py
index c429b33..ed8c041 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -357,3 +357,8 @@
     if request.config.getoption("--push"):
         docker_push("gerrit-init")
     return gerrit_init_image
+
+
+@pytest.fixture(scope="session")
+def required_plugins(request):
+    return ["healthcheck"]
diff --git a/tests/container-images/gerrit-base/test_container_structure_gerrit_base.py b/tests/container-images/gerrit-base/test_container_structure_gerrit_base.py
index c20144e..52a2f35 100755
--- a/tests/container-images/gerrit-base/test_container_structure_gerrit_base.py
+++ b/tests/container-images/gerrit-base/test_container_structure_gerrit_base.py
@@ -79,6 +79,14 @@
 
 @pytest.mark.docker
 @pytest.mark.structure
+def test_gerrit_base_contains_required_plugins(container_run, required_plugins):
+    for plugin in required_plugins:
+        exit_code, _ = container_run.exec_run("test -f /var/plugins/%s.jar" % plugin)
+        assert exit_code == 0
+
+
+@pytest.mark.docker
+@pytest.mark.structure
 def test_gerrit_base_site_permissions(container_run):
     exit_code, _ = container_run.exec_run("test -O /var/gerrit")
     assert exit_code == 0
diff --git a/tests/container-images/gerrit-init/test_container_integration_gerrit_init.py b/tests/container-images/gerrit-init/test_container_integration_gerrit_init.py
index 082ffa7..287dac7 100644
--- a/tests/container-images/gerrit-init/test_container_integration_gerrit_init.py
+++ b/tests/container-images/gerrit-init/test_container_integration_gerrit_init.py
@@ -126,7 +126,12 @@
             yaml.dump({"packagedPlugins": plugins}, f, default_flow_style=False)
 
     def test_gerrit_init_plugins_are_installed(
-        self, container_run_endless, init_config_dir, plugins_to_install, tmp_site_dir
+        self,
+        container_run_endless,
+        init_config_dir,
+        plugins_to_install,
+        tmp_site_dir,
+        required_plugins,
     ):
         self._configure_packaged_plugins(
             os.path.join(init_config_dir, "init.yaml"), plugins_to_install
@@ -143,5 +148,23 @@
             assert os.path.exists(os.path.join(plugins_path, "%s.jar" % plugin))
 
         installed_plugins = os.listdir(plugins_path)
+        expected_plugins = plugins_to_install + required_plugins
         for plugin in installed_plugins:
-            assert os.path.splitext(plugin)[0] in plugins_to_install
+            assert os.path.splitext(plugin)[0] in expected_plugins
+
+    def test_required_plugins_are_installed(
+        self, container_run_endless, init_config_dir, tmp_site_dir, required_plugins
+    ):
+        self._configure_packaged_plugins(
+            os.path.join(init_config_dir, "init.yaml"), ["hooks"]
+        )
+
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_init.py -s /var/gerrit -c /var/config/init.yaml"
+        )
+        assert exit_code == 0
+
+        for plugin in required_plugins:
+            assert os.path.exists(
+                os.path.join(tmp_site_dir, "plugins", "%s.jar" % plugin)
+            )