Do not reindex Lucene indices on every start of the gerrit pod

The gerrit pod installed by the primary gerrit chart was reindexing all
Gerrit indices on each startup. This could be expensive, if the amount
of data in the indices would be large.

This change moves the reindexing to the gerrit-init container.
A python-script was added, that runs reindexing, if indices are not
ready. This is the case in the following cases:

- The $gerrit_site/index/gerrit_index.config does not exist
- At least one of the indices is marked as not ready in the
  gerrit_index.config file.
- The index versions are different between the Lucene indices and the
  gerrit_index.config

Further if the chart is configured to not persist the Gerrit site,
reindexing will be enforced.

Change-Id: I65e74f6b2d55788737fa5b67b61b1d25e6ebb925
diff --git a/container-images/gerrit-init/tools/gerrit_reindex.py b/container-images/gerrit-init/tools/gerrit_reindex.py
new file mode 100755
index 0000000..a201ca6
--- /dev/null
+++ b/container-images/gerrit-init/tools/gerrit_reindex.py
@@ -0,0 +1,150 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os.path
+import subprocess
+import sys
+
+from git_config_parser import GitConfigParser
+from log import get_logger
+
+LOG = get_logger("reindex")
+
+
+class GerritReindexer:
+    def __init__(self, gerrit_site_path):
+        self.gerrit_site_path = gerrit_site_path
+        self.index_config_path = "%s/index/gerrit_index.config" % self.gerrit_site_path
+
+        self.index_type = self._get_index_type()
+        self.configured_indices = self._parse_gerrit_index_config()
+
+    def _get_index_type(self):
+        gerrit_config = GitConfigParser(
+            os.path.join(self.gerrit_site_path, "etc", "gerrit.config")
+        )
+        return gerrit_config.get("index.type", "lucene").lower()
+
+    def _parse_gerrit_index_config(self):
+        indices = dict()
+        if os.path.exists(self.index_config_path):
+            config = GitConfigParser(self.index_config_path)
+            options = config.list()
+            for opt in options:
+                name, version = opt["subsection"].rsplit("_", 1)
+                indices[name] = {
+                    "version": int(version),
+                    "ready": opt["value"].lower() == "true",
+                }
+        return indices
+
+    def _get_unready_indices(self):
+        unready_indices = []
+        for index, index_attrs in self.configured_indices.items():
+            if not index_attrs["ready"]:
+                LOG.info("Index %s not ready.", index)
+                unready_indices.append(index)
+        return unready_indices
+
+    def _get_lucene_indices(self):
+        file_list = os.listdir(os.path.join(self.gerrit_site_path, "index"))
+        file_list.remove("gerrit_index.config")
+        lucene_indices = dict()
+        for index in file_list:
+            try:
+                (name, version) = index.split("_")
+                lucene_indices[name] = int(version)
+            except ValueError:
+                LOG.debug("Ignoring invalid file in index-directory: %s", index)
+        return lucene_indices
+
+    def _check_lucene_index_versions(self):
+        lucene_indices = self._get_lucene_indices()
+        if not lucene_indices:
+            return False
+        for index, index_attrs in self.configured_indices.items():
+            if index_attrs["version"] is not lucene_indices[index]:
+                return False
+        return True
+
+    def reindex(self, indices=None):
+        LOG.info("Starting to reindex.")
+        command = "java -jar /var/war/gerrit.war reindex -d %s" % self.gerrit_site_path
+
+        if indices:
+            command += " ".join([" --index %s" % i for i in indices])
+
+        reindex_process = subprocess.run(command.split(), stdout=subprocess.PIPE)
+
+        if reindex_process.returncode > 0:
+            LOG.error(
+                "An error occured, when reindexing Gerrit indices. Exit code: %d",
+                reindex_process.returncode,
+            )
+            sys.exit(1)
+
+        LOG.info("Finished reindexing.")
+
+    def start(self, is_forced):
+        if is_forced:
+            self.reindex()
+            return
+
+        if not self.configured_indices:
+            LOG.info("gerrit_index.config does not exist. Creating all indices.")
+            self.reindex()
+            return
+
+        unready_indices = self._get_unready_indices()
+        if unready_indices:
+            self.reindex(unready_indices)
+
+        if self.index_type == "lucene":
+            if not self._check_lucene_index_versions():
+                LOG.info("Not all indices are up-to-date.")
+                self.reindex()
+                return
+        else:
+            self.reindex()
+            return
+
+        LOG.info("Skipping reindexing.")
+
+
+# pylint: disable=C0103
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-s",
+        "--site",
+        help="Path to Gerrit site",
+        dest="site",
+        action="store",
+        default="/var/gerrit",
+        required=True,
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        help="Reindex even if indices are ready.",
+        dest="force",
+        action="store_true",
+    )
+    args = parser.parse_args()
+
+    reindexer = GerritReindexer(args.site)
+    reindexer.start(args.force)
diff --git a/container-images/gerrit-init/tools/git_config_parser.py b/container-images/gerrit-init/tools/git_config_parser.py
index fa5d609..c5cea95 100644
--- a/container-images/gerrit-init/tools/git_config_parser.py
+++ b/container-images/gerrit-init/tools/git_config_parser.py
@@ -29,11 +29,31 @@
         command = "git config -f %s --get %s" % (self.path, key)
         return self._execute_shell_command_and_get_output_lines(command)
 
+    def list(self):
+        command = "git config -f %s --list" % (self.path)
+        options = self._execute_shell_command_and_get_output_lines(command)
+        option_list = list()
+        for opt in options:
+            parsed_opt = dict()
+            full_key, value = opt.split("=", 1)
+            parsed_opt["value"] = value
+            full_key = full_key.split(".")
+            parsed_opt["section"] = full_key[0]
+            if len(full_key) == 2:
+                parsed_opt["subsection"] = None
+                parsed_opt["key"] = full_key[1]
+            elif len(full_key) == 3:
+                parsed_opt["subsection"] = full_key[1]
+                parsed_opt["key"] = full_key[2]
+            option_list.append(parsed_opt)
+
+        return option_list
+
     def get(self, key, default=None):
         """
-    Returns value of given key in the configuration file. If the key appears
-    multiple times, the last value is returned.
-    """
+        Returns value of given key in the configuration file. If the key appears
+        multiple times, the last value is returned.
+        """
         try:
             return self._get_value(key)[-1]
         except subprocess.CalledProcessError:
diff --git a/container-images/gerrit/tools/start b/container-images/gerrit/tools/start
index 05b76ba..812e036 100755
--- a/container-images/gerrit/tools/start
+++ b/container-images/gerrit/tools/start
@@ -12,9 +12,6 @@
 # from secrets/configmaps in Kubernetes make the containing directory read-only.
 symlink_config_to_site
 
-java -jar /var/gerrit/bin/gerrit.war reindex \
-    -d /var/gerrit
-
 JAVA_OPTIONS=$(git config --file /var/gerrit/etc/gerrit.config --get-all container.javaOptions)
 java ${JAVA_OPTIONS} -jar /var/gerrit/bin/gerrit.war daemon \
   -d /var/gerrit \
diff --git a/helm-charts/gerrit/templates/gerrit.stateful-set.yaml b/helm-charts/gerrit/templates/gerrit.stateful-set.yaml
index c3c04a6..35ebe63 100644
--- a/helm-charts/gerrit/templates/gerrit.stateful-set.yaml
+++ b/helm-charts/gerrit/templates/gerrit.stateful-set.yaml
@@ -76,6 +76,13 @@
             -s /var/gerrit
 
           symlink_config_to_site
+
+          {{ if not .Values.gerrit.persistence.enabled -}}
+            FLAGS="$FLAGS --force"
+          {{- end }}
+
+          /var/tools/gerrit_reindex.py $FLAGS \
+            -s /var/gerrit
         volumeMounts:
         - name: gerrit-site
           mountPath: "/var/gerrit"
diff --git a/tests/container-images/gerrit-init/test_container_integration_gerrit_init_reindexing.py b/tests/container-images/gerrit-init/test_container_integration_gerrit_init_reindexing.py
new file mode 100644
index 0000000..749a328
--- /dev/null
+++ b/tests/container-images/gerrit-init/test_container_integration_gerrit_init_reindexing.py
@@ -0,0 +1,137 @@
+# pylint: disable=E1101
+
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+
+@pytest.fixture(scope="function")
+def temp_site(tmp_path_factory):
+    return tmp_path_factory.mktemp("gerrit-index-test")
+
+
+@pytest.fixture(scope="function")
+def container_run_endless(request, docker_client, gerrit_init_image, temp_site):
+    container_run = docker_client.containers.run(
+        image=gerrit_init_image.id,
+        entrypoint="/bin/ash",
+        command=["-c", "tail -f /dev/null"],
+        volumes={str(temp_site): {"bind": "/var/gerrit", "mode": "rw"}},
+        user="gerrit",
+        detach=True,
+        auto_remove=True,
+    )
+
+    def stop_container():
+        container_run.stop(timeout=1)
+
+    request.addfinalizer(stop_container)
+
+    return container_run
+
+
+@pytest.mark.incremental
+class TestGerritReindex:
+    def _get_indices(self, container):
+        _, indices = container.exec_run(
+            "git config -f /var/gerrit/index/gerrit_index.config "
+            + "--name-only "
+            + "--get-regexp index"
+        )
+        indices = indices.decode().strip().splitlines()
+        return [index.split(".")[1] for index in indices]
+
+    def test_gerrit_init_skips_reindexing_on_fresh_site(
+        self, temp_site, container_run_endless
+    ):
+        assert not os.path.exists(
+            os.path.join(temp_site, "index", "gerrit_index.config")
+        )
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_init.py -s /var/gerrit -c /var/config/default.config.yaml"
+        )
+        assert exit_code == 0
+        expected_files = ["gerrit_index.config"] + self._get_indices(
+            container_run_endless
+        )
+        for expected_file in expected_files:
+            assert os.path.exists(os.path.join(temp_site, "index", expected_file))
+
+        timestamp_index_dir = os.path.getctime(os.path.join(temp_site, "index"))
+
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_reindex.py -s /var/gerrit"
+        )
+        assert exit_code == 0
+        assert timestamp_index_dir == os.path.getctime(os.path.join(temp_site, "index"))
+
+    def test_gerrit_init_fixes_missing_index_config(
+        self, container_run_endless, temp_site
+    ):
+        container_run_endless.exec_run(
+            "/var/tools/gerrit_init.py -s /var/gerrit -c /var/config/default.config.yaml"
+        )
+        os.remove(os.path.join(temp_site, "index", "gerrit_index.config"))
+
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_reindex.py -s /var/gerrit"
+        )
+        assert exit_code == 0
+
+        exit_code, _ = container_run_endless.exec_run("/var/gerrit/bin/gerrit.sh start")
+        assert exit_code == 0
+
+    def test_gerrit_init_fixes_unready_indices(self, container_run_endless):
+        container_run_endless.exec_run(
+            "/var/tools/gerrit_init.py -s /var/gerrit -c /var/config/default.config.yaml"
+        )
+
+        indices = self._get_indices(container_run_endless)
+        assert indices
+        container_run_endless.exec_run(
+            "git config -f /var/gerrit/index/gerrit_index.config %s false" % indices[0]
+        )
+
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_reindex.py -s /var/gerrit"
+        )
+        assert exit_code == 0
+
+        exit_code, _ = container_run_endless.exec_run("/var/gerrit/bin/gerrit.sh start")
+        assert exit_code == 0
+
+    def test_gerrit_init_fixes_outdated_indices(self, container_run_endless, temp_site):
+        container_run_endless.exec_run(
+            "/var/tools/gerrit_init.py -s /var/gerrit -c /var/config/default.config.yaml"
+        )
+
+        index = self._get_indices(container_run_endless)[0]
+        (name, version) = index.split("_")
+        os.rename(
+            os.path.join(temp_site, "index", index),
+            os.path.join(
+                temp_site, "index", "{name}_{0:04d}".format(int(version) - 1, name=name)
+            ),
+        )
+
+        exit_code, _ = container_run_endless.exec_run(
+            "/var/tools/gerrit_reindex.py -s /var/gerrit"
+        )
+        assert exit_code == 0
+
+        exit_code, _ = container_run_endless.exec_run("/var/gerrit/bin/gerrit.sh start")
+        assert exit_code == 0
diff --git a/tests/container-images/gerrit-init/test_container_structure_gerrit_init.py b/tests/container-images/gerrit-init/test_container_structure_gerrit_init.py
index ab94fc8..1098c82 100755
--- a/tests/container-images/gerrit-init/test_container_structure_gerrit_init.py
+++ b/tests/container-images/gerrit-init/test_container_structure_gerrit_init.py
@@ -29,6 +29,7 @@
     params=[
         "/var/tools/download_plugins.py",
         "/var/tools/gerrit_init.py",
+        "/var/tools/gerrit_reindex.py",
         "/var/tools/git_config_parser.py",
         "/var/tools/init_config.py",
     ],