Merge "Provide SSH command to analyze H2 caches" into stable-3.3
diff --git a/BUILD b/BUILD
index 2336593..1ffe0a7 100644
--- a/BUILD
+++ b/BUILD
@@ -12,6 +12,9 @@
     name = "cache-chroniclemap",
     srcs = glob(["src/main/java/**/*.java"]),
     resources = glob(["src/main/resources/**/*"]),
+    manifest_entries = [
+        "Gerrit-SshModule: com.googlesource.gerrit.modules.cache.chroniclemap.command.SSHCommandModule",
+    ],
     deps = [
         "@chronicle-map//jar",
         "@chronicle-core//jar",
@@ -23,6 +26,8 @@
         "@javapoet//jar",
         "@jna-platform//jar",
         "@dev-jna//jar",
+        "//lib:h2",
+        "//lib/commons:io",
     ],
 )
 
diff --git a/config.md b/config.md
index bffc481..ab189b5 100644
--- a/config.md
+++ b/config.md
@@ -226,4 +226,10 @@
 brand new persistent cache (i.e. delete the old one).
 
 More information on recovery can be found in the
-[Official documentation](https://github.com/OpenHFT/Chronicle-Map/blob/master/docs/CM_Tutorial.adoc#recovery)
\ No newline at end of file
+[Official documentation](https://github.com/OpenHFT/Chronicle-Map/blob/master/docs/CM_Tutorial.adoc#recovery)
+
+### Tuning
+
+This module provides tooling to help understand how configuration should be
+optimized for chronicle-map.
+More information in the [tuning](tuning.md) documentation.
diff --git a/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2Caches.java b/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2Caches.java
new file mode 100644
index 0000000..3dec1cd
--- /dev/null
+++ b/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2Caches.java
@@ -0,0 +1,141 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.googlesource.gerrit.modules.cache.chroniclemap.command;
+
+import com.google.common.flogger.FluentLogger;
+import com.google.gerrit.server.config.GerritServerConfig;
+import com.google.gerrit.server.config.SitePaths;
+import com.google.gerrit.sshd.SshCommand;
+import com.google.inject.Inject;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collections;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.commons.io.FilenameUtils;
+import org.eclipse.jgit.lib.Config;
+import org.h2.Driver;
+
+public class AnalyzeH2Caches extends SshCommand {
+  private static final FluentLogger logger = FluentLogger.forEnclosingClass();
+
+  private String cacheDirectory;
+  private SitePaths site;
+
+  @Inject
+  AnalyzeH2Caches(@GerritServerConfig Config cfg, SitePaths site) {
+    this.cacheDirectory = cfg.getString("cache", null, "directory");
+    this.site = site;
+  }
+
+  @Override
+  protected void run() throws UnloggedFailure, Failure, Exception {
+    Set<Path> h2Files = getH2CacheFiles();
+    stdout.println("Extracting information from H2 caches...");
+
+    Config config = new Config();
+    for (Path h2 : h2Files) {
+      final String url = jdbcUrl(h2);
+      final String baseName =
+          FilenameUtils.removeExtension(FilenameUtils.getBaseName(h2.toString()));
+      try {
+
+        try (Connection conn = Driver.load().connect(url, null);
+            Statement s = conn.createStatement();
+            ResultSet r =
+                s.executeQuery(
+                    "SELECT COUNT(*), AVG(OCTET_LENGTH(k)), AVG(OCTET_LENGTH(v)) FROM data")) {
+          if (r.next()) {
+            long size = r.getLong(1);
+            long avgKeySize = r.getLong(2);
+            long avgValueSize = r.getLong(3);
+
+            if (size == 0) {
+              stdout.println(String.format("WARN: Cache %s is empty, skipping.", baseName));
+              continue;
+            }
+
+            config.setLong("cache", baseName, "entries", size);
+            config.setLong("cache", baseName, "avgKeySize", avgKeySize);
+
+            // Account for extra serialization bytes of TimedValue entries.
+            short TIMED_VALUE_WRAPPER_OVERHEAD = Long.BYTES + Integer.BYTES;
+            config.setLong(
+                "cache", baseName, "avgValueSize", avgValueSize + TIMED_VALUE_WRAPPER_OVERHEAD);
+          }
+        }
+      } catch (SQLException e) {
+        stderr.println(String.format("Could not get information from %s", baseName));
+        throw die(e);
+      }
+    }
+    stdout.println();
+    stdout.println("****************************");
+    stdout.println("** Chronicle-map template **");
+    stdout.println("****************************");
+    stdout.println();
+    stdout.println(config.toText());
+  }
+
+  private Set<Path> getH2CacheFiles() throws UnloggedFailure {
+
+    try {
+      final Optional<Path> maybeCacheDir = getCacheDir(site, cacheDirectory);
+
+      return maybeCacheDir
+          .map(
+              cacheDir -> {
+                try {
+                  return Files.walk(cacheDir)
+                      .filter(path -> path.toString().endsWith("h2.db"))
+                      .collect(Collectors.toSet());
+                } catch (IOException e) {
+                  logger.atSevere().withCause(e).log("Could not read H2 files");
+                  return Collections.<Path>emptySet();
+                }
+              })
+          .orElse(Collections.emptySet());
+    } catch (IOException e) {
+      throw die(e);
+    }
+  }
+
+  private String jdbcUrl(Path h2FilePath) {
+    final String normalized =
+        FilenameUtils.removeExtension(FilenameUtils.removeExtension(h2FilePath.toString()));
+    return "jdbc:h2:" + normalized + ";AUTO_SERVER=TRUE";
+  }
+
+  private static Optional<Path> getCacheDir(SitePaths site, String name) throws IOException {
+    if (name == null) {
+      return Optional.empty();
+    }
+    Path loc = site.resolve(name);
+    if (!Files.exists(loc)) {
+      throw new IOException(
+          String.format("disk cache is configured but doesn't exist: %s", loc.toAbsolutePath()));
+    }
+    if (!Files.isReadable(loc)) {
+      throw new IOException(String.format("Can't read from disk cache: %s", loc.toAbsolutePath()));
+    }
+    logger.atFine().log("Enabling disk cache %s", loc.toAbsolutePath());
+    return Optional.of(loc);
+  }
+}
diff --git a/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/SSHCommandModule.java b/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/SSHCommandModule.java
new file mode 100644
index 0000000..038c177
--- /dev/null
+++ b/src/main/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/SSHCommandModule.java
@@ -0,0 +1,23 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.googlesource.gerrit.modules.cache.chroniclemap.command;
+
+import com.google.gerrit.sshd.PluginCommandModule;
+
+public class SSHCommandModule extends PluginCommandModule {
+  @Override
+  protected void configureCommands() {
+    command("analyze-h2-caches").to(AnalyzeH2Caches.class);
+  }
+}
diff --git a/src/test/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2CachesIT.java b/src/test/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2CachesIT.java
new file mode 100644
index 0000000..25327e0
--- /dev/null
+++ b/src/test/java/com/googlesource/gerrit/modules/cache/chroniclemap/command/AnalyzeH2CachesIT.java
@@ -0,0 +1,106 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.googlesource.gerrit.modules.cache.chroniclemap.command;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.gerrit.acceptance.LightweightPluginDaemonTest;
+import com.google.gerrit.acceptance.TestPlugin;
+import com.google.gerrit.acceptance.UseLocalDisk;
+import com.google.gerrit.acceptance.UseSsh;
+import com.google.gerrit.server.config.SitePaths;
+import com.google.inject.Inject;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import org.junit.Test;
+
+@UseSsh
+@TestPlugin(
+    name = "cache-chroniclemap",
+    sshModule = "com.googlesource.gerrit.modules.cache.chroniclemap.command.SSHCommandModule")
+public class AnalyzeH2CachesIT extends LightweightPluginDaemonTest {
+
+  @Inject private SitePaths sitePaths;
+
+  private String cmd = Joiner.on(" ").join("cache-chroniclemap", "analyze-h2-caches");
+
+  @Test
+  @UseLocalDisk
+  public void shouldAnalyzeH2Cache() throws Exception {
+    createChange();
+
+    String result = adminSshSession.exec(cmd);
+
+    adminSshSession.assertSuccess();
+    assertThat(result).contains("[cache \"mergeability\"]\n" + "\tentries = 1\n");
+    assertThat(result).contains("[cache \"diff\"]\n" + "\tentries = 1\n");
+    assertThat(result).contains("[cache \"accounts\"]\n" + "\tentries = 4\n");
+    assertThat(result).contains("[cache \"diff_summary\"]\n" + "\tentries = 1\n");
+    assertThat(result).contains("[cache \"persisted_projects\"]\n" + "\tentries = 3\n");
+  }
+
+  @Test
+  @UseLocalDisk
+  public void shouldProduceWarningWhenCacheFileIsEmpty() throws Exception {
+    List<String> expected =
+        ImmutableList.of(
+            "WARN: Cache diff_intraline is empty, skipping.",
+            "WARN: Cache change_kind is empty, skipping.",
+            "WARN: Cache diff_summary is empty, skipping.",
+            "WARN: Cache diff is empty, skipping.",
+            "WARN: Cache mergeability is empty, skipping.",
+            "WARN: Cache pure_revert is empty, skipping.",
+            "WARN: Cache git_tags is empty, skipping.");
+    String result = adminSshSession.exec(cmd);
+
+    adminSshSession.assertSuccess();
+    assertThat(ImmutableList.copyOf(result.split("\n"))).containsAtLeastElementsIn(expected);
+  }
+
+  @Test
+  @UseLocalDisk
+  public void shouldIgnoreNonH2Files() throws Exception {
+
+    Path cacheDirectory = sitePaths.resolve(cfg.getString("cache", null, "directory"));
+    Files.write(cacheDirectory.resolve("some.dat"), "some_content".getBytes());
+
+    List<String> expected =
+        ImmutableList.of(
+            "WARN: Cache diff_intraline is empty, skipping.",
+            "WARN: Cache change_kind is empty, skipping.",
+            "WARN: Cache diff_summary is empty, skipping.",
+            "WARN: Cache diff is empty, skipping.",
+            "WARN: Cache mergeability is empty, skipping.",
+            "WARN: Cache pure_revert is empty, skipping.",
+            "WARN: Cache git_tags is empty, skipping.");
+    String result = adminSshSession.exec(cmd);
+
+    adminSshSession.assertSuccess();
+    assertThat(ImmutableList.copyOf(result.split("\n"))).containsAtLeastElementsIn(expected);
+  }
+
+  @Test
+  @UseLocalDisk
+  public void shouldFailWhenCacheDirectoryDoesNotExists() throws Exception {
+    cfg.setString("cache", null, "directory", "/tmp/non_existing_directory");
+
+    adminSshSession.exec(cmd);
+    adminSshSession.assertFailure(
+        "fatal: disk cache is configured but doesn't exist: /tmp/non_existing_directory");
+  }
+}
diff --git a/tuning.md b/tuning.md
new file mode 100644
index 0000000..2f72668
--- /dev/null
+++ b/tuning.md
@@ -0,0 +1,104 @@
+# Tuning
+
+Tuning chronicle-map correctly might be a daunting task:
+How many entries does a particular cache instance need?
+what is the average key and value for it?
+
+Rather than leaving you only with the trial and error (or the guesswork)
+approach, this module provides a utility to help you get started in the right
+direction.
+
+Since chronicle-map is one of the first open-source alternatives to the H2
+implementation, it is very likely that your Gerrit instance has been running
+with the default H2 cache backend.
+
+The idea is to read from the _actual_ H2 persisted files and output the
+information that will be required to configure chronicle-map as an alternative.
+
+You can do this _before_ installing cache-chroniclemap as a lib module so that
+your Gerrit server will not need downtime. As follows:
+
+* Drop `cache-chroniclemap.jar` file in the `plugins/` directory.
+* Wait for the pluginLoader to acknowledge and load the new plugin. You will
+see an entry in the `error_log`:
+
+```
+INFO  com.google.gerrit.server.plugins.PluginLoader : Loaded plugin cache-chroniclemap
+```
+
+* You can now run an analysis on the current status of your H2 caches
+
+```bash
+ssh -p 29418 admin@<gerrit-server> cache-chroniclemap analyze-h2-caches
+```
+
+The result will be outputted on standard output in a git config format.
+This is an example (the values are made up):
+
+```
+****************************
+** Chronicle-map template **
+****************************
+
+[cache "diff_summary"]
+	entries = 101
+	avgKeySize = 192
+	avgValueSize = 1350
+[cache "web_sessions"]
+	entries = 1
+	avgKeySize = 68
+	avgValueSize = 332
+[cache "pure_revert"]
+	entries = 1
+	avgKeySize = 112
+	avgValueSize = 8
+[cache "mergeability"]
+	entries = 101
+	avgKeySize = 150
+	avgValueSize = 8
+[cache "diff"]
+	entries = 101
+	avgKeySize = 188
+	avgValueSize = 5035
+[cache "persisted_projects"]
+	entries = 2
+	avgKeySize = 88
+	avgValueSize = 4489
+[cache "accounts"]
+	entries = 5
+	avgKeySize = 52
+	avgValueSize = 505
+```
+
+Empty caches (if any) will not generate empty config stanzas, rather a warning
+will be displayed on standard output.
+
+For example:
+```
+WARN: Cache diff_intraline is empty, skipping
+```
+
+Please note that the generated configuration is not necessarily final and it
+might still need adjustments:
+* Since chronicle-map file size is pre-allocated, you might want to allow for
+more entries.
+* You might want account for uncertainty by specifying a `maxBloatFactor` greater
+than 1.
+* any other reason.
+
+Once you gathered the information you wanted you might consider to remove the
+plugin:
+
+* Remove the jar from the `plugins` directory
+
+```bash
+rm plugins/cache-chroniclemap.jar
+```
+
+* Wait for the pluginLoader to acknowledge and unload the plugin. You will
+see an entry in the `error_log`:
+
+```
+INFO  com.google.gerrit.server.plugins.PluginLoader : Unloading plugin cache-chroniclemap
+```
+