Add the git file diff cache

The new git file diff cache computes the pure git diff for a single file
path according to a git tree diff. The output entity contains:
  1) List of edits (modified regions) of the file.
  2) Old and new file paths.
  3) Old and new blob object IDs for the file in git.
  4) Change type (file is added or deleted or modified, etc...).
  5) Patch type (whether the file is unified or binary).
  6) File mode.

The cache also returns an empty git FileDiff if the requested file
path does not exist among the set of modified files between the 2
commits identified by their tree IDs in the cache key.

Change-Id: Ia278881c0f5bd71a82ea6863653ae52eb1c3ad0b
diff --git a/java/com/google/gerrit/entities/Patch.java b/java/com/google/gerrit/entities/Patch.java
index e6b2167..856765b 100644
--- a/java/com/google/gerrit/entities/Patch.java
+++ b/java/com/google/gerrit/entities/Patch.java
@@ -160,5 +160,40 @@
     }
   }
 
+  /**
+   * Constants describing various file modes recognized by GIT. This is the Gerrit entity for {@link
+   * org.eclipse.jgit.lib.FileMode}.
+   */
+  public enum FileMode implements CodedEnum {
+    /** Mode indicating an entry is a tree (aka directory). */
+    TREE('T'),
+
+    /** Mode indicating an entry is a symbolic link. */
+    SYMLINK('S'),
+
+    /** Mode indicating an entry is a non-executable file. */
+    REGULAR_FILE('R'),
+
+    /** Mode indicating an entry is an executable file. */
+    EXECUTABLE_FILE('E'),
+
+    /** Mode indicating an entry is a submodule commit in another repository. */
+    GITLINK('G'),
+
+    /** Mode indicating an entry is missing during parallel walks. */
+    MISSING('M');
+
+    private final char code;
+
+    FileMode(char c) {
+      code = c;
+    }
+
+    @Override
+    public char getCode() {
+      return code;
+    }
+  }
+
   private Patch() {}
 }
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/FileHeaderUtil.java b/java/com/google/gerrit/server/patch/gitfilediff/FileHeaderUtil.java
new file mode 100644
index 0000000..9827a69
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/FileHeaderUtil.java
@@ -0,0 +1,169 @@
+//  Copyright (C) 2020 The Android Open Source Project
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import com.google.gerrit.entities.Patch;
+import com.google.gerrit.entities.Patch.PatchType;
+import java.util.Optional;
+import org.eclipse.jgit.patch.CombinedFileHeader;
+import org.eclipse.jgit.patch.FileHeader;
+
+/** A utility class for the {@link FileHeader} JGit object */
+public class FileHeaderUtil {
+  private static final Byte NUL = '\0';
+
+  /**
+   * The maximum number of characters to lookup in the binary file {@link FileHeader}. This is used
+   * to scan the file header for the occurrence of the {@link #NUL} character.
+   *
+   * <p>This limit assumes a uniform distribution of all characters, hence the probability of the
+   * occurrence of each character = (1 / 256). We want to find the limit that makes the prob. of
+   * finding {@link #NUL} > 0.999. 1 - (255 / 256) ^ N > 0.999 yields N = 1766. We set the limit to
+   * this value multiplied by 10 for more confidence.
+   */
+  private static final int BIN_FILE_MAX_SCAN_LIMIT = 20000;
+
+  /** Converts the {@link FileHeader} parameter ot a String representation. */
+  static String toString(FileHeader header) {
+    return new String(FileHeaderUtil.toByteArray(header), UTF_8);
+  }
+
+  /** Converts the {@link FileHeader} parameter to a byte array. */
+  static byte[] toByteArray(FileHeader header) {
+    int end = getEndOffset(header);
+    if (header.getStartOffset() == 0 && end == header.getBuffer().length) {
+      return header.getBuffer();
+    }
+
+    final byte[] buf = new byte[end - header.getStartOffset()];
+    System.arraycopy(header.getBuffer(), header.getStartOffset(), buf, 0, buf.length);
+    return buf;
+  }
+
+  /**
+   * Returns the old file path associated with the {@link FileHeader}, or empty if the file is
+   * {@link Patch.ChangeType#ADDED} or {@link Patch.ChangeType#REWRITE}.
+   */
+  static Optional<String> getOldPath(FileHeader header) {
+    Patch.ChangeType changeType = getChangeType(header);
+    switch (changeType) {
+      case DELETED:
+      case COPIED:
+      case RENAMED:
+      case MODIFIED:
+        return Optional.of(header.getOldPath());
+
+      case ADDED:
+      case REWRITE:
+        return Optional.empty();
+    }
+    return Optional.empty();
+  }
+
+  /**
+   * Returns the new file path associated with the {@link FileHeader}, or empty if the file is
+   * {@link Patch.ChangeType#DELETED}.
+   */
+  static Optional<String> getNewPath(FileHeader header) {
+    Patch.ChangeType changeType = getChangeType(header);
+    switch (changeType) {
+      case DELETED:
+        return Optional.empty();
+
+      case ADDED:
+      case MODIFIED:
+      case REWRITE:
+      case COPIED:
+      case RENAMED:
+        return Optional.of(header.getNewPath());
+    }
+    return Optional.empty();
+  }
+
+  /** Returns the change type associated with the file header. */
+  static Patch.ChangeType getChangeType(FileHeader header) {
+    // In Gerrit, we define our own entities  of the JGit entities, so that we have full control
+    // over their behaviors (e.g. making sure that these entities are immutable so that we can add
+    // them as fields of keys / values of persisted caches).
+
+    // TODO(ghareeb): remove the dead code of the value REWRITE and all its handling
+    switch (header.getChangeType()) {
+      case ADD:
+        return Patch.ChangeType.ADDED;
+      case MODIFY:
+        return Patch.ChangeType.MODIFIED;
+      case DELETE:
+        return Patch.ChangeType.DELETED;
+      case RENAME:
+        return Patch.ChangeType.RENAMED;
+      case COPY:
+        return Patch.ChangeType.COPIED;
+      default:
+        throw new IllegalArgumentException("Unsupported type " + header.getChangeType());
+    }
+  }
+
+  static PatchType getPatchType(FileHeader header) {
+    PatchType patchType;
+
+    switch (header.getPatchType()) {
+      case UNIFIED:
+        patchType = Patch.PatchType.UNIFIED;
+        break;
+      case GIT_BINARY:
+      case BINARY:
+        patchType = Patch.PatchType.BINARY;
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported type " + header.getPatchType());
+    }
+
+    if (patchType != PatchType.BINARY) {
+      byte[] buf = header.getBuffer();
+      // TODO(ghareeb): should we adjust the max limit threshold?
+      // JGit sometimes misses the detection of binary files. In this case we look into the file
+      // header for the occurrence of NUL characters, which is a definite signal that the file is
+      // binary. We limit the number of characters to lookup to avoid performance bottlenecks.
+      for (int ptr = header.getStartOffset();
+          ptr < Math.min(header.getEndOffset(), BIN_FILE_MAX_SCAN_LIMIT);
+          ptr++) {
+        if (buf[ptr] == NUL) {
+          // It's really binary, but Git couldn't see the nul early enough to realize its binary,
+          // and instead produced the diff.
+          //
+          // Force it to be a binary; it really should have been that.
+          return PatchType.BINARY;
+        }
+      }
+    }
+    return patchType;
+  }
+
+  /**
+   * Returns the end offset of the diff header line of the {@code FileHeader parameter} before the
+   * appearance of any file edits (diff hunks).
+   */
+  private static int getEndOffset(FileHeader fileHeader) {
+    if (fileHeader instanceof CombinedFileHeader) {
+      return fileHeader.getEndOffset();
+    }
+    if (!fileHeader.getHunks().isEmpty()) {
+      return fileHeader.getHunks().get(0).getStartOffset();
+    }
+    return fileHeader.getEndOffset();
+  }
+}
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiff.java b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiff.java
new file mode 100644
index 0000000..de07ed1
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiff.java
@@ -0,0 +1,221 @@
+//  Copyright (C) 2020 The Android Open Source Project
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.gerrit.entities.Patch;
+import com.google.gerrit.entities.Patch.ChangeType;
+import com.google.gerrit.entities.Patch.PatchType;
+import com.google.gerrit.server.cache.serialize.CacheSerializer;
+import com.google.gerrit.server.patch.entities.Edit;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Optional;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.DiffFormatter;
+import org.eclipse.jgit.lib.AbbreviatedObjectId;
+import org.eclipse.jgit.lib.FileMode;
+import org.eclipse.jgit.patch.FileHeader;
+
+/**
+ * Entity representing a modified file (added, deleted, modified, renamed, etc...) between two
+ * different git commits.
+ */
+@AutoValue
+public abstract class GitFileDiff {
+  private static final Map<FileMode, Patch.FileMode> fileModeMap =
+      ImmutableMap.of(
+          FileMode.TREE,
+          Patch.FileMode.TREE,
+          FileMode.SYMLINK,
+          Patch.FileMode.SYMLINK,
+          FileMode.REGULAR_FILE,
+          Patch.FileMode.REGULAR_FILE,
+          FileMode.EXECUTABLE_FILE,
+          Patch.FileMode.EXECUTABLE_FILE,
+          FileMode.MISSING,
+          Patch.FileMode.MISSING);
+
+  private static Patch.FileMode mapFileMode(FileMode jgitFileMode) {
+    if (!fileModeMap.containsKey(jgitFileMode)) {
+      throw new IllegalArgumentException("Unsupported type " + jgitFileMode);
+    }
+    return fileModeMap.get(jgitFileMode);
+  }
+
+  /**
+   * Creates a {@link GitFileDiff} using the {@code diffEntry} and the {@code diffFormatter}
+   * parameters.
+   */
+  static GitFileDiff create(DiffEntry diffEntry, DiffFormatter diffFormatter) throws IOException {
+    FileHeader fileHeader = diffFormatter.toFileHeader(diffEntry);
+    ImmutableList<Edit> edits =
+        fileHeader.toEditList().stream().map(Edit::fromJGitEdit).collect(toImmutableList());
+
+    return builder()
+        .edits(edits)
+        .oldId(diffEntry.getOldId())
+        .newId(diffEntry.getNewId())
+        .fileHeader(FileHeaderUtil.toString(fileHeader))
+        .oldPath(FileHeaderUtil.getOldPath(fileHeader))
+        .newPath(FileHeaderUtil.getNewPath(fileHeader))
+        .changeType(FileHeaderUtil.getChangeType(fileHeader))
+        .patchType(FileHeaderUtil.getPatchType(fileHeader))
+        .oldMode(Optional.of(mapFileMode(diffEntry.getOldMode())))
+        .newMode(Optional.of(mapFileMode(diffEntry.getNewMode())))
+        .build();
+  }
+
+  /**
+   * Represents an empty file diff, which means that the file was not modified between the two git
+   * trees identified by {@link #oldId()} and {@link #newId()}.
+   *
+   * @param newFilePath the file name at the {@link #newId()} git tree.
+   */
+  static GitFileDiff empty(
+      AbbreviatedObjectId oldId, AbbreviatedObjectId newId, String newFilePath) {
+    return builder()
+        .oldId(oldId)
+        .newId(newId)
+        .newPath(Optional.of(newFilePath))
+        .edits(ImmutableList.of())
+        .fileHeader("")
+        .build();
+  }
+
+  /** An {@link ImmutableList} of the modified regions in the file. */
+  public abstract ImmutableList<Edit> edits();
+
+  /** A string representation of the {@link org.eclipse.jgit.patch.FileHeader}. */
+  public abstract String fileHeader();
+
+  /** The file name at the old git tree identified by {@link #oldId()} */
+  public abstract Optional<String> oldPath();
+
+  /** The file name at the new git tree identified by {@link #newId()} */
+  public abstract Optional<String> newPath();
+
+  /** The 20 bytes SHA-1 object ID of the old git tree of the diff. */
+  public abstract AbbreviatedObjectId oldId();
+
+  /** The 20 bytes SHA-1 object ID of the new git tree of the diff. */
+  public abstract AbbreviatedObjectId newId();
+
+  /** The file mode of the old file at the old git tree diff identified by {@link #oldId()}. */
+  public abstract Optional<Patch.FileMode> oldMode();
+
+  /** The file mode of the new file at the new git tree diff identified by {@link #newId()}. */
+  public abstract Optional<Patch.FileMode> newMode();
+
+  /** The change type associated with the file. */
+  public abstract Optional<ChangeType> changeType();
+
+  /** The patch type associated with the file. */
+  public abstract Optional<PatchType> patchType();
+
+  /**
+   * Returns true if the object was created using the {@link #empty(AbbreviatedObjectId,
+   * AbbreviatedObjectId, String)} method.
+   */
+  public boolean isEmpty() {
+    return edits().isEmpty();
+  }
+
+  /** Returns the size of the object in bytes. */
+  public int weight() {
+    int result = 20 * 2; // oldId and newId
+    result += 16 * edits().size(); // each edit contains 4 integers (hence 16 bytes)
+    result += stringSize(fileHeader());
+    if (oldPath().isPresent()) {
+      result += stringSize(oldPath().get());
+    }
+    if (newPath().isPresent()) {
+      result += stringSize(newPath().get());
+    }
+    if (changeType().isPresent()) {
+      result += 4;
+    }
+    if (patchType().isPresent()) {
+      result += 4;
+    }
+    if (oldMode().isPresent()) {
+      result += 4;
+    }
+    if (newMode().isPresent()) {
+      result += 4;
+    }
+    return result;
+  }
+
+  private static int stringSize(String str) {
+    if (str != null) {
+      // each character in the string occupies two bytes. Ignoring the fixed overhead for the string
+      // (length, offset and hash code) since they are negligible and do not affect the comparison
+      // of two strings
+      return str.length() * 2;
+    }
+    return 0;
+  }
+
+  public static Builder builder() {
+    return new AutoValue_GitFileDiff.Builder();
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract Builder edits(ImmutableList<Edit> value);
+
+    public abstract Builder fileHeader(String value);
+
+    public abstract Builder oldPath(Optional<String> value);
+
+    public abstract Builder newPath(Optional<String> value);
+
+    public abstract Builder oldId(AbbreviatedObjectId value);
+
+    public abstract Builder newId(AbbreviatedObjectId value);
+
+    public abstract Builder oldMode(Optional<Patch.FileMode> value);
+
+    public abstract Builder newMode(Optional<Patch.FileMode> value);
+
+    public abstract Builder changeType(ChangeType value);
+
+    public abstract Builder patchType(PatchType value);
+
+    public abstract GitFileDiff build();
+  }
+
+  enum Serializer implements CacheSerializer<GitFileDiff> {
+    INSTANCE;
+
+    @Override
+    public byte[] serialize(GitFileDiff object) {
+      // TODO(ghareeb)
+      return new byte[0];
+    }
+
+    @Override
+    public GitFileDiff deserialize(byte[] in) {
+      // TODO(ghareeb)
+      return null;
+    }
+  }
+}
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCache.java b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCache.java
new file mode 100644
index 0000000..2516761
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCache.java
@@ -0,0 +1,43 @@
+//  Copyright (C) 2020 The Android Open Source Project
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.gerrit.server.patch.DiffNotAvailableException;
+
+/** This cache computes pure git diff for a single file path according to a git tree diff. */
+public interface GitFileDiffCache {
+
+  /**
+   * Returns the git file diff for a single file path identified by its key.
+   *
+   * @param key identifies two git trees, a specific file path and other diff parameters.
+   * @return the file diff for a single file path identified by its key.
+   * @throws DiffNotAvailableException if the tree IDs of the key are invalid for this project or if
+   *     file contents could not be read.
+   */
+  GitFileDiff get(GitFileDiffCacheKey key) throws DiffNotAvailableException;
+
+  /**
+   * Returns the file diff for a collection of file paths identified by their keys.
+   *
+   * @param keys identifying different file paths of different projects.
+   * @return a map of the input keys to their corresponding git file diffs.
+   * @throws DiffNotAvailableException if the diff failed to be evaluated for one or more of the
+   *     input keys due to invalid tree IDs or if file contents could not be read.
+   */
+  ImmutableMap<GitFileDiffCacheKey, GitFileDiff> getAll(Iterable<GitFileDiffCacheKey> keys)
+      throws DiffNotAvailableException;
+}
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheImpl.java b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheImpl.java
new file mode 100644
index 0000000..5af6424
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheImpl.java
@@ -0,0 +1,272 @@
+//  Copyright (C) 2020 The Android Open Source Project
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import static java.util.function.Function.identity;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Streams;
+import com.google.gerrit.entities.Project;
+import com.google.gerrit.extensions.client.DiffPreferencesInfo.Whitespace;
+import com.google.gerrit.server.cache.CacheModule;
+import com.google.gerrit.server.git.GitRepositoryManager;
+import com.google.gerrit.server.patch.DiffNotAvailableException;
+import com.google.inject.Inject;
+import com.google.inject.Module;
+import com.google.inject.name.Named;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.eclipse.jgit.diff.DiffEntry;
+import org.eclipse.jgit.diff.DiffEntry.ChangeType;
+import org.eclipse.jgit.diff.DiffFormatter;
+import org.eclipse.jgit.diff.HistogramDiff;
+import org.eclipse.jgit.diff.RawTextComparator;
+import org.eclipse.jgit.lib.AbbreviatedObjectId;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.util.io.DisabledOutputStream;
+
+/** Implementation of the {@link GitFileDiffCache} */
+public class GitFileDiffCacheImpl implements GitFileDiffCache {
+  private static final String GIT_DIFF = "git_file_diff";
+
+  public static Module module() {
+    return new CacheModule() {
+      @Override
+      protected void configure() {
+        bind(GitFileDiffCache.class).to(GitFileDiffCacheImpl.class);
+        persist(GIT_DIFF, GitFileDiffCacheKey.class, GitFileDiff.class)
+            .maximumWeight(10 << 20)
+            .weigher(GitFileDiffWeigher.class)
+            .valueSerializer(GitFileDiff.Serializer.INSTANCE)
+            .loader(GitFileDiffCacheImpl.Loader.class);
+      }
+    };
+  }
+
+  /** Enum for the supported diff algorithms for the file diff computation. */
+  public enum DiffAlgorithm {
+    HISTOGRAM,
+    HISTOGRAM_WITHOUT_MYERS_FALLBACK
+  }
+
+  /** Creates a new JGit diff algorithm instance using the Gerrit's {@link DiffAlgorithm} enum. */
+  public static class DiffAlgorithmFactory {
+    public static org.eclipse.jgit.diff.DiffAlgorithm create(DiffAlgorithm diffAlgorithm) {
+      HistogramDiff result = new HistogramDiff();
+      if (diffAlgorithm.equals(DiffAlgorithm.HISTOGRAM_WITHOUT_MYERS_FALLBACK)) {
+        result.setFallbackAlgorithm(null);
+      }
+      return result;
+    }
+  }
+
+  private final LoadingCache<GitFileDiffCacheKey, GitFileDiff> cache;
+
+  @Inject
+  public GitFileDiffCacheImpl(
+      @Named(GIT_DIFF) LoadingCache<GitFileDiffCacheKey, GitFileDiff> cache) {
+    this.cache = cache;
+  }
+
+  @Override
+  public GitFileDiff get(GitFileDiffCacheKey key) throws DiffNotAvailableException {
+    try {
+      return cache.get(key);
+    } catch (ExecutionException e) {
+      throw new DiffNotAvailableException(e);
+    }
+  }
+
+  @Override
+  public ImmutableMap<GitFileDiffCacheKey, GitFileDiff> getAll(Iterable<GitFileDiffCacheKey> keys)
+      throws DiffNotAvailableException {
+    try {
+      return cache.getAll(keys);
+    } catch (ExecutionException e) {
+      throw new DiffNotAvailableException(e);
+    }
+  }
+
+  static class Loader extends CacheLoader<GitFileDiffCacheKey, GitFileDiff> {
+    /**
+     * Extractor for the file path from a {@link DiffEntry}. Returns the old file path if the entry
+     * corresponds to a deleted file, otherwise it returns the new file path.
+     */
+    private static final Function<DiffEntry, String> pathExtractor =
+        (DiffEntry entry) ->
+            entry.getChangeType().equals(ChangeType.DELETE)
+                ? entry.getOldPath()
+                : entry.getNewPath();
+
+    private final GitRepositoryManager repoManager;
+
+    @Inject
+    public Loader(GitRepositoryManager repoManager) {
+      this.repoManager = repoManager;
+    }
+
+    @Override
+    public GitFileDiff load(GitFileDiffCacheKey key) throws IOException {
+      return loadAll(ImmutableList.of(key)).get(key);
+    }
+
+    @Override
+    public Map<GitFileDiffCacheKey, GitFileDiff> loadAll(
+        Iterable<? extends GitFileDiffCacheKey> keys) throws IOException {
+      ImmutableMap.Builder<GitFileDiffCacheKey, GitFileDiff> result =
+          ImmutableMap.builderWithExpectedSize(Iterables.size(keys));
+
+      Map<Project.NameKey, List<GitFileDiffCacheKey>> byProject =
+          Streams.stream(keys)
+              .distinct()
+              .collect(Collectors.groupingBy(GitFileDiffCacheKey::project));
+
+      for (Map.Entry<Project.NameKey, List<GitFileDiffCacheKey>> entry : byProject.entrySet()) {
+        try (Repository repo = repoManager.openRepository(entry.getKey());
+            ObjectReader reader = repo.newObjectReader()) {
+
+          // Grouping keys by diff options because each group of keys will be processed with a
+          // separate call to JGit using the DiffFormatter object.
+          Map<DiffOptions, List<GitFileDiffCacheKey>> optionsGroups =
+              entry.getValue().stream().collect(Collectors.groupingBy(DiffOptions::fromKey));
+
+          for (Map.Entry<DiffOptions, List<GitFileDiffCacheKey>> group : optionsGroups.entrySet()) {
+            result.putAll(loadAllImpl(repo, reader, group.getKey(), group.getValue()));
+          }
+        }
+      }
+      return result.build();
+    }
+
+    /**
+     * Loads the git file diffs for all keys of the same repository, and having the same diff {@code
+     * options}.
+     *
+     * @return The git file diffs for all input keys.
+     */
+    private Map<GitFileDiffCacheKey, GitFileDiff> loadAllImpl(
+        Repository repo, ObjectReader reader, DiffOptions options, List<GitFileDiffCacheKey> keys)
+        throws IOException {
+      ImmutableMap.Builder<GitFileDiffCacheKey, GitFileDiff> result =
+          ImmutableMap.builderWithExpectedSize(keys.size());
+      Map<GitFileDiffCacheKey, String> filePaths =
+          keys.stream().collect(Collectors.toMap(identity(), GitFileDiffCacheKey::newFilePath));
+      DiffFormatter formatter = createDiffFormatter(options, repo, reader);
+      Map<String, DiffEntry> diffEntries = loadDiffEntries(formatter, options, filePaths.values());
+      for (GitFileDiffCacheKey key : filePaths.keySet()) {
+        String newFilePath = filePaths.get(key);
+        if (diffEntries.containsKey(newFilePath)) {
+          result.put(key, GitFileDiff.create(diffEntries.get(newFilePath), formatter));
+          continue;
+        }
+        result.put(
+            key,
+            GitFileDiff.empty(
+                AbbreviatedObjectId.fromObjectId(key.oldTree()),
+                AbbreviatedObjectId.fromObjectId(key.newTree()),
+                newFilePath));
+      }
+      return result.build();
+    }
+
+    private static Map<String, DiffEntry> loadDiffEntries(
+        DiffFormatter diffFormatter, DiffOptions diffOptions, Collection<String> filePaths)
+        throws IOException {
+      Set<String> filePathsSet = ImmutableSet.copyOf(filePaths);
+      List<DiffEntry> diffEntries =
+          diffFormatter.scan(diffOptions.oldTree(), diffOptions.newTree());
+
+      return diffEntries.stream()
+          .filter(d -> filePathsSet.contains(pathExtractor.apply(d)))
+          .collect(Collectors.toMap(d -> pathExtractor.apply(d), identity()));
+    }
+
+    private static DiffFormatter createDiffFormatter(
+        DiffOptions diffOptions, Repository repo, ObjectReader reader) {
+      try (DiffFormatter diffFormatter = new DiffFormatter(DisabledOutputStream.INSTANCE)) {
+        diffFormatter.setReader(reader, repo.getConfig());
+        RawTextComparator cmp = comparatorFor(diffOptions.whitespace());
+        diffFormatter.setDiffComparator(cmp);
+        if (diffOptions.renameScore() != -1) {
+          diffFormatter.setDetectRenames(true);
+          diffFormatter.getRenameDetector().setRenameScore(diffOptions.renameScore());
+        }
+        diffFormatter.setDiffAlgorithm(DiffAlgorithmFactory.create(diffOptions.diffAlgorithm()));
+        return diffFormatter;
+      }
+    }
+
+    private static RawTextComparator comparatorFor(Whitespace ws) {
+      switch (ws) {
+        case IGNORE_ALL:
+          return RawTextComparator.WS_IGNORE_ALL;
+
+        case IGNORE_TRAILING:
+          return RawTextComparator.WS_IGNORE_TRAILING;
+
+        case IGNORE_LEADING_AND_TRAILING:
+          return RawTextComparator.WS_IGNORE_CHANGE;
+
+        case IGNORE_NONE:
+        default:
+          return RawTextComparator.DEFAULT;
+      }
+    }
+  }
+
+  /** An entity representing the options affecting the diff computation. */
+  @AutoValue
+  abstract static class DiffOptions {
+    /** Convert a {@link GitFileDiffCacheKey} input to a {@link DiffOptions}. */
+    static DiffOptions fromKey(GitFileDiffCacheKey key) {
+      return create(
+          key.oldTree(), key.newTree(), key.renameScore(), key.whitespace(), key.diffAlgorithm());
+    }
+
+    private static DiffOptions create(
+        ObjectId oldTree,
+        ObjectId newTree,
+        Integer renameScore,
+        Whitespace whitespace,
+        DiffAlgorithm diffAlgorithm) {
+      return new AutoValue_GitFileDiffCacheImpl_DiffOptions(
+          oldTree, newTree, renameScore, whitespace, diffAlgorithm);
+    }
+
+    abstract ObjectId oldTree();
+
+    abstract ObjectId newTree();
+
+    abstract Integer renameScore();
+
+    abstract Whitespace whitespace();
+
+    abstract DiffAlgorithm diffAlgorithm();
+  }
+}
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheKey.java b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheKey.java
new file mode 100644
index 0000000..61250ef
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffCacheKey.java
@@ -0,0 +1,98 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import com.google.auto.value.AutoValue;
+import com.google.gerrit.entities.Project;
+import com.google.gerrit.entities.Project.NameKey;
+import com.google.gerrit.extensions.client.DiffPreferencesInfo;
+import com.google.gerrit.extensions.client.DiffPreferencesInfo.Whitespace;
+import com.google.gerrit.server.patch.gitfilediff.GitFileDiffCacheImpl.DiffAlgorithm;
+import org.eclipse.jgit.lib.ObjectId;
+
+// TODO(ghareeb): Implement a key protobuf serializer
+@AutoValue
+public abstract class GitFileDiffCacheKey {
+
+  /** A specific git project / repository. */
+  public abstract Project.NameKey project();
+
+  /** The old 20 bytes SHA-1 git tree ID used in the git tree diff */
+  public abstract ObjectId oldTree();
+
+  /** The new 20 bytes SHA-1 git tree ID used in the git tree diff */
+  public abstract ObjectId newTree();
+
+  /** File name in the tree identified by {@link #newTree()} */
+  public abstract String newFilePath();
+
+  /**
+   * Percentage score used to identify a file as a "rename". A special value of -1 means that the
+   * computation will ignore renames and rename detection will be disabled.
+   */
+  public abstract int renameScore();
+
+  public abstract DiffAlgorithm diffAlgorithm();
+
+  public abstract DiffPreferencesInfo.Whitespace whitespace();
+
+  public int weight() {
+    return stringSize(project().get())
+        + 20 * 2 // oldTree and newTree
+        + stringSize(newFilePath())
+        + 4 // renameScore
+        + 4 // diffAlgorithm
+        + 4; // whitespace
+  }
+
+  private static int stringSize(String str) {
+    if (str != null) {
+      // each character in the string occupies 2 bytes. Ignoring the fixed overhead for the string
+      // (length, offset and hash code) since they are negligible and do not
+      // affect the comparison of 2 strings
+      return str.length() * 2;
+    }
+    return 0;
+  }
+
+  public static Builder builder() {
+    return new AutoValue_GitFileDiffCacheKey.Builder();
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract Builder project(NameKey value);
+
+    public abstract Builder oldTree(ObjectId value);
+
+    public abstract Builder newTree(ObjectId value);
+
+    public abstract Builder newFilePath(String value);
+
+    public abstract Builder renameScore(Integer value);
+
+    public Builder disableRenameDetection() {
+      renameScore(-1);
+      return this;
+    }
+
+    public abstract Builder diffAlgorithm(DiffAlgorithm value);
+
+    public abstract Builder whitespace(Whitespace value);
+
+    public abstract GitFileDiffCacheKey build();
+  }
+}
diff --git a/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffWeigher.java b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffWeigher.java
new file mode 100644
index 0000000..47f7791
--- /dev/null
+++ b/java/com/google/gerrit/server/patch/gitfilediff/GitFileDiffWeigher.java
@@ -0,0 +1,25 @@
+//  Copyright (C) 2020 The Android Open Source Project
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+package com.google.gerrit.server.patch.gitfilediff;
+
+import com.google.common.cache.Weigher;
+
+public class GitFileDiffWeigher implements Weigher<GitFileDiffCacheKey, GitFileDiff> {
+
+  @Override
+  public int weigh(GitFileDiffCacheKey key, GitFileDiff gitFileDiff) {
+    return key.weight() + gitFileDiff.weight();
+  }
+}